1. Load required libraries
library(ALDEx2)
library(corpcor)
library(EnvStats)
library(gdata)
library(ggplot2)
library(gplots)
library(gridExtra)
library(growthcurver)
library(igraph)
library(multtest)
library(network)
library(plyr)
library(pheatmap)
library(RColorBrewer)
library(Rtsne)
library(viridis)

1.1 Load genome annotation related to efflux pumps, metabolism and transcriptional regulation (compiled from multiple sources)

#load transcriptional regulatory network compiled from RegulonDB (http://regulondb.ccg.unam.mx)
#rows are target genes and columns are regulators
#activation and repression are indicated with '1' and '-1', respectively
load("../Data/Input/Ecoli_TRN.RData")
#define transcription factor (TF) names
tf.names<-colnames(transcriptional.network)
#keep only locus tags for TF names (format is locus_name)
tf.names<-sapply(1:length(tf.names),function(x){strsplit(tf.names[x],split="_")[[1]][2]})
#read names of metabolic genes on the E. coli metabolic model iJO1366 (Orth et al. 2011, MSB)
metabolic.genes<-read.csv("../Data/Input/genes_metabolic_model_iJO1366.csv")
metabolic.genes<-metabolic.genes$Model.genes
#read efflux pump related genes (compiled from EcoCyc database and literature review)
effluxPump.genes<-read.csv("../Data/Input/efflux_pump_genes.csv",header=T)
effluxPump.genes<-effluxPump.genes$Locus
#exclude TFs and efflux pump related genes from the metabolic category
metabolic.genes<-setdiff(metabolic.genes,union(tf.names,effluxPump.genes))
#create function to convert locus tag to standard gene name (e.g., b0001 to thrL)
ecoli.trn.genes<-rownames(transcriptional.network)
convert.locus.to.gene.name<-function(locus.list)
{
  output<-sapply(1:length(locus.list),function(x){strsplit(ecoli.trn.genes[grep(locus.list[x],ecoli.trn.genes)],split="_")[[1]][1]})
  output
}

1.2 Run differential expression analysis of transcriptional data reported by Handel et al. (2014)

#perform differential expression analysis with a Bayesian t-test using Cyber-T algorithm (Baldi and Long, 2001)
#code downloaded from http://cybert.ics.uci.edu
source("Bayesian_Ttest/cyberTtest.R")
#read normalized GEO data (accession ID: GSE57084)
handel.normalized.matrix<-read.csv("../Data/Differential_expression_analysis/GEO_Handel2014/Handel_normalized_GEO_data.csv",header=T,row.names = 1)
#read map between microarray probe IDs and loci
probes.to.loci.map<-read.csv("../Data/Differential_expression_analysis/GEO_Handel2014/probes_loci_array_map.csv",header = T,row.names=1)
probes.to.loci.map<-as.matrix(probes.to.loci.map)
#replace probe names in expression matrix with locus tags
rownames(handel.normalized.matrix)<-probes.to.loci.map[rownames(handel.normalized.matrix),"ORF"]
#function to count the number of TFs, efflux pump genes and metabolic genes among significantly up- and down-regulated genes
classify.degs<-function(geneSet,expression.change) 
{
  output<-c()
  if(expression.change=="UP")
  {
    #count TF genes
    up.tfs<-length(intersect(geneSet,tf.names))
    #print names of up-regulated TFs
    if(up.tfs!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,tf.names))
    }
    #count efflux pump genes (eps)
    up.eps<-length(intersect(geneSet,effluxPump.genes))
    #print names of up-regulated eps
    if(up.eps!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,effluxPump.genes))
    }
    #count up-regulated metabolic genes
    up.metabolic<-length(intersect(geneSet,metabolic.genes))
    #count other up-regulated genes
    up.others<-length(geneSet)-(up.tfs+up.eps+up.metabolic)
    #compile values in a single vector
    output<-cbind(up.others,up.metabolic,up.tfs,up.eps)
  }
    if(expression.change=="DOWN")
    {
    #count TF genes
    down.tfs<- -1*length(intersect(geneSet,tf.names))
    #print names of down-regulated TFs
    if(down.tfs!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,tf.names))
    }
    #count efflux pump genes (eps)
    down.eps<- -1*length(intersect(geneSet,effluxPump.genes))
    #print names of down-regulated eps
    if(down.eps!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,effluxPump.genes))
    }
    #count down-regulated metabolic genes
    down.metabolic<- -1* length(intersect(geneSet,metabolic.genes))
    #count other down-regulated genes
    down.others<- -1*length(geneSet)-(down.tfs+down.eps+down.metabolic)
    #compile values in a single vector
    output<-cbind(down.others,down.metabolic,down.tfs,down.eps)
  }
  output
}
#run differential expression analyses 
#WT response to tetracycline: WT(+TET) vs WT(-TET) 
wt.response<-bayesT(handel.normalized.matrix[,1:6],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define set of differentially expressed genes (DEGs)
wt.response.degs<-rownames(wt.response)[which(wt.response$BH < 0.05 & abs(wt.response$meanC - wt.response$meanE)>1)]
print(paste("WT differentially expressed",length(wt.response.degs),"genes in response to tetracycline",sep=" "))
[1] "WT differentially expressed 93 genes in response to tetracycline"
#create vector with fold-change for all genes
wt.response.fold.change<-wt.response$meanE - wt.response$meanC
names(wt.response.fold.change)<-rownames(wt.response)
#define sets of up- and down-regulated genes
wt.deg.up<-wt.response.degs[which(wt.response.fold.change[wt.response.degs]>0)]
wt.deg.down<-wt.response.degs[which(wt.response.fold.change[wt.response.degs]<0)]
#write CSV files with up- and down-regulated genes for downstream analysis on DAVID 
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/wt_response_up_080822.csv",wt.deg.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/wt_response_down_080822.csv",wt.deg.down,row.names = F)
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.wt.vector<-classify.degs(wt.deg.up,"UP")
down.wt.vector<-classify.degs(wt.deg.down,"DOWN")
#differential expression analysis for TetR baseline change: TetR(-TET) vs WT(-TET)
tetR.basal<-bayesT(handel.normalized.matrix[,c(1:3,7:9)],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define DEGs
tetR.basal.degs<-rownames(tetR.basal)[which(tetR.basal$BH < 0.05 & abs(tetR.basal$meanC - tetR.basal$meanE)>1)]
print(paste("TetR (-TET) differentially expressed",length(tetR.basal.degs),"genes with respect to WT(-TET)",sep=" "))
[1] "TetR (-TET) differentially expressed 197 genes with respect to WT(-TET)"
#save fold-change for all genes
tetR.basal.fold.change<-tetR.basal$meanE - tetR.basal$meanC
names(tetR.basal.fold.change)<-rownames(tetR.basal)
#define sets of up- and down-regulated genes
tetR.basal.degs.up<-tetR.basal.degs[which(tetR.basal.fold.change[tetR.basal.degs]>0)]
tetR.basal.degs.down<-tetR.basal.degs[which(tetR.basal.fold.change[tetR.basal.degs]<0)]
#write CSV files with up- and down-regulated genes for downstream analysis on DAVID 
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_basal_up_080822.csv",tetR.basal.degs.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_basal_down_080822.csv",tetR.basal.degs.down,row.names = F)
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.tetR.basal.vector<-classify.degs(tetR.basal.degs.up,"UP")
down.tetR.basal.vector<-classify.degs(tetR.basal.degs.down,"DOWN")
#differential expression analysis for TetR response to tetracycline: TetR(+) vs TetR(-) 
tetR.response<-bayesT(handel.normalized.matrix[,7:12],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define DEGs
tetR.response.degs<-rownames(tetR.response)[which(tetR.response$BH < 0.05 & abs(tetR.response$meanC - tetR.response$meanE)>1)]
print(paste("TetR differentially expressed",length(tetR.response.degs),"genes in response to tetracycline",sep=" "))
[1] "TetR differentially expressed 896 genes in response to tetracycline"
#save fold-change for all genes
tetR.response.fold.change<-tetR.response$meanE - tetR.response$meanC
names(tetR.response.fold.change)<-rownames(tetR.response)
#define sets of up- and down-regulated genes
tetR.response.degs.up<-tetR.response.degs[which(tetR.response.fold.change[tetR.response.degs]>0)]
tetR.response.degs.down<-tetR.response.degs[which(tetR.response.fold.change[tetR.response.degs]<0)]
#write CSV files with up- and down-regulated genes for downstream analysis on DAVID 
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_response_up_080822.csv",tetR.response.degs.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_response_down_080822.csv",tetR.response.degs.down,row.names = F)
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.tetR.response.vector<-classify.degs(tetR.response.degs.up,"UP")
down.tetR.response.vector<-classify.degs(tetR.response.degs.down,"DOWN")
#differential expression analysis for TetR(+TET) vs WT(+TET)
tetR.wt.response.comparison<-bayesT(handel.normalized.matrix[,c(4:6,10:12)],numC = 3,numE = 3,conf = 7,doMulttest = T)
tetR.wt.response.comparison.degs<-rownames(tetR.wt.response.comparison)[which(tetR.wt.response.comparison$BH < 0.05 & abs(tetR.wt.response.comparison$meanC - tetR.wt.response.comparison$meanE)>1)]
print(paste("TetR (+TET) differentially expressed",length(tetR.wt.response.comparison.degs),"genes with respect to WT(+TET)",sep=" "))
[1] "TetR (+TET) differentially expressed 1005 genes with respect to WT(+TET)"
#save fold-change for all genes
tetR.wt.response.comparison.fold.change<-tetR.wt.response.comparison$meanE - tetR.wt.response.comparison$meanC
names(tetR.wt.response.comparison.fold.change)<-rownames(tetR.wt.response.comparison)
#define sets of up- and down-regulated genes
tetR.wt.response.comparison.degs.up<-tetR.wt.response.comparison.degs[which(tetR.wt.response.comparison.fold.change[tetR.wt.response.comparison.degs]>0)]
tetR.wt.response.comparison.degs.down<-tetR.wt.response.comparison.degs[which(tetR.wt.response.comparison.fold.change[tetR.wt.response.comparison.degs]<0)]
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.tetR.wt.response.comparison.vector<-classify.degs(tetR.wt.response.comparison.degs.up,"UP")
down.tetR.wt.response.comparison.vector<-classify.degs(tetR.wt.response.comparison.degs.down,"DOWN")
#create Figure 1A
#vector to generate blank space
zero.vector<-rep(0,4)
#selected colors for barplot
colors.barplot<-c("grey",rgb(253,174,97,maxColorValue = 255),rgb(166,217,106,maxColorValue = 255),rgb(215,25,28,maxColorValue = 255))
#generate barplot
#first, information regarding up-regulated genes
barplot(t(rbind(up.wt.vector,up.tetR.basal.vector,up.tetR.response.vector,up.tetR.wt.response.comparison.vector,zero.vector)),density=rep(15,4) 
        , angle=c(0,45,90,135) , col=colors.barplot,ylim=c(-500,500),ylab="Number of DEGs",main="Fig. 1A")
#then, add information regarding down-regulated genes is added
barplot(t(rbind(down.wt.vector,down.tetR.basal.vector,down.tetR.response.vector,down.tetR.wt.response.comparison.vector,zero.vector)),density=rep(15,4) , angle=c(0,45,90,135) ,col=colors.barplot,add=T)
abline(h=0,col="black",lwd=2)

1.3 Create Fig. S1

#Fig. S1 (fold-change of transcript level of fermentation-related genes)
#list of fermentation-related genes, compiled from EcoCyc database
fermentation.ecocyc<-read.csv("../Data/Input/fermentation_pathway_ecocyc.csv",header=F)
#function to translate gene name to locus tag
translate.gene.name.to.locus<-function(geneSet)
{
  output<-c()
  for(g in geneSet)
  {
  gene.pos<-grep(paste(g,"_",sep=""),rownames(transcriptional.network))
  if(length(gene.pos)==1)
  {
    output<-c(output,strsplit(rownames(transcriptional.network)[gene.pos],split="_")[[1]][2])
  }
  else
  {
    output<-c(output,g)
  }
  }
  output
}
#convert gene names to locus tags
fermentation.loci<-translate.gene.name.to.locus(fermentation.ecocyc[,1])
#add missing loci
fermentation.loci[15:16]<-c("b1675","b2929")
#create matrix with average expression value for WT and TetR strains in the presence/absence of tetracycline
handel.average.expression<-cbind(rowMeans(handel.normalized.matrix[,1:3]),rowMeans(handel.normalized.matrix[,4:6]),rowMeans(handel.normalized.matrix[,7:9]),rowMeans(handel.normalized.matrix[,10:12]))
#define matrix with average expression profile of fermentation genes
fermentation.pathway.average.expression.matrix<-handel.average.expression[as.vector(fermentation.loci),]
#add gene names information as rownames
rownames(fermentation.pathway.average.expression.matrix)<-as.vector(fermentation.ecocyc$V1)
#add column names
colnames(fermentation.pathway.average.expression.matrix)<-c("WT(-TET)","WT(+TET)", "TetR(-TET)", "TetR(+TET)")
#create heatmap displaying log2 fold-change with respect to untreated WT
#heatmap fold-change breaks
heatmap.breaks<-c(-3,-2,-1,-0.5,0,0.5,1,2)
#heatmap color palette
heatmap.colors<-colorRampPalette(rev(brewer.pal(7, "RdBu")) )(8)[8:1]
pheatmap(fermentation.pathway.average.expression.matrix[,2:4]-fermentation.pathway.average.expression.matrix[,1],scale="none",color =heatmap.colors[-5],cluster_rows=F,cluster_cols = F,fontsize = 8,angle_col = 90,
         breaks = heatmap.breaks,legend_breaks=heatmap.breaks,legend=T, main="Fig. S1")

#evaluate significance of fold-change using random permutation
#total permutations
N=10000
#average fold change of fermentation genes in the baseline change in TetR
true.average.fold.change.fermetation<-mean(fermentation.pathway.average.expression.matrix[,3]-fermentation.pathway.average.expression.matrix[,1])
random.average.fold.change<-c()
for(n in 1:N)
{
  random.selected.genes<-sample(1:nrow(handel.normalized.matrix),length(fermentation.loci))
  random.fold.change<-handel.average.expression[random.selected.genes,3]- handel.average.expression[random.selected.genes,1]
  random.average.fold.change<-c(random.average.fold.change,mean(random.fold.change))
}
#permutation p-value
permutation.pval.fermentation.genes<-length(which(abs(random.average.fold.change) >= true.average.fold.change.fermetation))/N
print(paste("Permutation p-value =",permutation.pval.fermentation.genes),
      sep="")
[1] "Permutation p-value = 0.0062"

1.4 Create Fig. S2 (acrA, acrB and acrZ expression and fitness profile in the presence of tetracycline)

#efflux pump genes of interest
acrA<-"b0463"
acrB<-"b0462"
acrZ<-"b0762"
#create Fig. S2A - expression profiles of selected genes
selected.genes.average.expression.matrix<-handel.average.expression[c(acrA,acrB,acrZ),2:4]-handel.average.expression[c(acrA,acrB,acrZ),1]
par(mfrow=c(1,1))
selected.genes.average.expression.matrix<-as.matrix(selected.genes.average.expression.matrix)
#add names of columns
colnames(selected.genes.average.expression.matrix)<-c("WT(+)","TetR(-)","TetR(+)")
#create barplot
barplot(selected.genes.average.expression.matrix,beside=T,col=c(rgb(141,160,203,maxColorValue = 255),rgb(252,141,98,maxColorValue = 255),rgb(102,194,165,maxColorValue = 255)),ylab="Log2 fold-change (vs. WT control)",ylim=c(-0.5,1.5),main="Fig. S2A")
 legend("topleft",fill=c(rgb(141,160,203,maxColorValue = 255),rgb(252,141,98,maxColorValue = 255),rgb(102,194,165,maxColorValue = 255)),legend = 
       c("AcrA","AcrB","AcrZ"))

#Fig. S2B
#load fitness data from Nichols et al. (Cell 2011)
fitness.scores<-read.xls("../Data/Miscellaneous_files/phenotipic landscape table s2.xls",header=T,fill=T)
#create boxplot for selected genes in tetracycline conditions
tetracycline.conditions<-grep("TETRACYCLINE",colnames(fitness.scores))
boxplot(ylim=c(-12,2),t(fitness.scores[c(3965,1278,2819),tetracycline.conditions]),
        col=c(rgb(141,160,203,maxColorValue = 255),rgb(252,141,98,maxColorValue = 255),rgb(102,194, 165,maxColorValue = 255)),outline=T,boxlty=1,whisklty = 1, staplelty = 1, names=c("acrA*","acrB*","acrZ"),frame=F,ylab="Deletion fitness score in tetracycline",cex.lab=1.1,main="Fig. S2B")
abline(h=0,lty=2)

1.5 Create Fig. 1B and accompanying Table S2

#perform differential expression analysis for TetR(+TET) vs WT(-TET)
tetR.plus.vs.WT.minus<-bayesT(handel.normalized.matrix[,c(1:3,10:12)],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define DEGs
tetR.plus.vs.WT.minus.degs<-rownames(tetR.plus.vs.WT.minus)[which(tetR.plus.vs.WT.minus$BH < 0.05 & abs(tetR.plus.vs.WT.minus$meanC - tetR.plus.vs.WT.minus$meanE)>1)]
print(paste("TetR(+TET) differentially expressed",length(tetR.plus.vs.WT.minus.degs),"genes with respect to WT(-TET)",sep=" "))
[1] "TetR(+TET) differentially expressed 932 genes with respect to WT(-TET)"
#define fold-change vector
tetR.plus.vs.WT.minus.fold.change<-tetR.plus.vs.WT.minus$meanE - tetR.plus.vs.WT.minus$meanC
names(tetR.plus.vs.WT.minus.fold.change)<-rownames(tetR.plus.vs.WT.minus)
#define up- and down-regulated genes
tetR.plus.vs.WT.minus.degs.up<-tetR.plus.vs.WT.minus.degs[which(tetR.plus.vs.WT.minus.fold.change[tetR.plus.vs.WT.minus.degs]>0)]
tetR.plus.vs.WT.minus.degs.down<-tetR.plus.vs.WT.minus.degs[which(tetR.plus.vs.WT.minus.fold.change[tetR.plus.vs.WT.minus.degs]<0)]
#save CSV files with up- and down-regulated genes for downstream DAVID analysis
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR+TET_WT-TET_upregulated_081522.csv",tetR.plus.vs.WT.minus.degs.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR+TET_WT-TET_downregulated_081522.csv",tetR.plus.vs.WT.minus.degs.down,row.names = F)
#Fig. 1B - Heatmap for selected functional terms
#read DAVID output files (i.e. functional enrichment) for WT response
#compile set of terms associated with WT response (down-regulated genes)
david.wt.response.down<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_wt_response_down.csv",header=T)
significant.terms.wt.response.down<-as.character(david.wt.response.down$Term[which(david.wt.response.down$Benjamini < 0.05)])
#start vector with all enriched terms
all.enriched.terms<-significant.terms.wt.response.down
#compile set of terms associated with up-regulated genes as part of the WT response
david.wt.response.up<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_wt_response_up.csv",header=T)
significant.terms.wt.response.up<-as.character(david.wt.response.up$Term[which(david.wt.response.up$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.wt.response.up)
#read DAVID output (i.e. functional enrichment) for TetR basaline changes
#define set of terms associated with TetR basal response (down-regulated genes) 
david.tetR.basal.down<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_basal_down.csv",header=T)
significant.terms.tetR.basal.down<-as.character(david.tetR.basal.down$Term[which(david.tetR.basal.down$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.basal.down)
#define set of terms associated with up-regulated genes as part of the TetR basal response
david.tetR.basal.up<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_basal_up.csv",header=T)
significant.terms.tetR.basal.up<-as.character(david.tetR.basal.up$Term[which(david.tetR.basal.up$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.basal.up)
#read DAVID output (i.e. functional enrichment) for TetR(+TET) vs WT(-TET) 
#define set of terms associated with down-regulated genes
david.tetR.response.down<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_plus_vs_wt_minus_down.csv",header=T)
significant.terms.tetR.response.down<-as.character(david.tetR.response.down$Term[which(david.tetR.response.down$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.response.down)
#define set of terms associated with up-regulated genes
david.tetR.response.up<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_plus_vs_wt_minus_up.csv",header=T)
significant.terms.tetR.response.up<-as.character(david.tetR.response.up$Term[which(david.tetR.response.up$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.response.up)
#create matrix with information for all three comparisons with respect to the untreated WT strain
functional.enrichment.matrix<-matrix(ncol=3,nrow=length(all.enriched.terms),dimnames = list(all.enriched.terms,c("WT(+)","TetR(-)","TetR(+)")),0)
#fill out the  enrichment matrix 
#up- and down-regulated genes are indicated as 1s and -1s, respectively
#fill first column: WT(+) vs WT(-)
functional.enrichment.matrix[significant.terms.wt.response.down,"WT(+)"]<- -1
functional.enrichment.matrix[significant.terms.wt.response.up,"WT(+)"]<- 1
#fill second column: TetR(-) vs WT(-) 
functional.enrichment.matrix[significant.terms.tetR.basal.down,"TetR(-)"]<- -1
functional.enrichment.matrix[significant.terms.tetR.basal.up,"TetR(-)"]<- 1
#fill third column: TetR(+) vs WT(-)
functional.enrichment.matrix[significant.terms.tetR.response.down,"TetR(+)"]<- -1
functional.enrichment.matrix[significant.terms.tetR.response.up,"TetR(+)"]<- 1
#create sub-matrix with selected functional terms (for Fig. 1B)
#functional terms were manually selected
matrix.selected.terms<-functional.enrichment.matrix[c(1,15,19,26,31,23,25,28,17,39,38,71,81,51,41,48,75,77,74,49,50,61,52,58,73,82,80,84,87:89),]
#Fig. 1B
pheatmap(t(matrix.selected.terms),color =bluered(3)[3:1],scale = "none",cluster_rows=F,cluster_cols = F,
         fontsize = 8,angle_col = 90,main="Fig. 1B")

#create table S2
#initialize matrix
tableS2.matrix<-matrix(ncol=5,nrow=length(all.enriched.terms),
                         dimnames = list(all.enriched.terms,c("WT(+)","TetR(-)","TetR(+)","Total number of genes associated with term","P-value")),0)
#fill relevant information for over-represented terms in WT response  
#down-regulated terms 
for(t in significant.terms.wt.response.down)
 {
   term.position<-which(david.wt.response.down$Term == t)
   #fill row information
   tableS2.matrix[t,c(1,4)]<- as.matrix(david.wt.response.down[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.wt.response.down[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
    }
}
#same process for up-regulated terms
for(t in significant.terms.wt.response.up)
 {
   term.position<-which(david.wt.response.up$Term == t)
   #fill row information
   tableS2.matrix[t,c(1,4)]<- as.matrix(david.wt.response.up[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.wt.response.up[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
    }
}
 #fill information for TetR basal response column 
 #down-regulated terms  
  for(t in significant.terms.tetR.basal.down)
 {
   term.position<-which(david.tetR.basal.down$Term == t)
   tableS2.matrix[t,c(2,4)]<- as.matrix(david.tetR.basal.down[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.basal.down[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
  }
 #for up-regulated terms
 for(t in significant.terms.tetR.basal.up)
 {
   term.position<-which(david.tetR.basal.up$Term == t)
   tableS2.matrix[t,c(2,4)]<- as.matrix(david.tetR.basal.up[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.basal.up[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
 }
 #fill relevant information for TetR(+TET) vs WT(-TET) 
 #down-regulated terms  
 for(t in significant.terms.tetR.response.down)
 {
   term.position<-which(david.tetR.response.down$Term == t)
   tableS2.matrix[t,c(3,4)]<- as.matrix(david.tetR.response.down[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.response.down[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
 }
 #for up-regulated terms  
 for(t in significant.terms.tetR.response.up)
 {
   term.position<-which(david.tetR.response.up$Term == t)
   tableS2.matrix[t,c(3,4)]<- as.matrix(david.tetR.response.up[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.response.up[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
    tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
 }
print(tableS2.matrix)
                                                                                                WT(+) TetR(-) TetR(+)
GO:0015803~branched-chain amino acid transport                                                  "5"   "0"     "0"    
GO:0006865~amino acid transport                                                                 "8"   "0"     "0"    
GO:0098713~leucine import across plasma membrane                                                "4"   "0"     "0"    
KW-0029~Amino-acid transport                                                                    "8"   "0"     "0"    
GO:0015658~branched-chain amino acid transmembrane transporter activity                         "4"   "0"     "0"    
GO:0005304~L-valine transmembrane transporter activity                                          "4"   "0"     "0"    
GO:0015188~L-isoleucine transmembrane transporter activity                                      "4"   "0"     "0"    
GO:1903714~isoleucine transmembrane transport                                                   "4"   "0"     "0"    
eco02024:Quorum sensing                                                                         "7"   "0"     "0"    
GO:0015190~L-leucine transmembrane transporter activity                                         "4"   "0"     "0"    
GO:0015823~phenylalanine transport                                                              "4"   "0"     "0"    
GO:1903785~L-valine transmembrane transport                                                     "4"   "0"     "0"    
GO:0055052~ATP-binding cassette (ABC) transporter complex, substrate-binding subunit-containing "5"   "0"     "0"    
GO:0015192~L-phenylalanine transmembrane transporter activity                                   "3"   "0"     "0"    
eco02010:ABC transporters                                                                       "7"   "0"     "0"    
KW-0813~Transport                                                                               "13"  "0"     "0"    
KW-0346~Stress response                                                                         "7"   "19"    "38"   
KW-0805~Transcription regulation                                                                "9"   "0"     "53"   
KW-0804~Transcription                                                                           "9"   "0"     "53"   
KW-0732~Signal                                                                                  "15"  "0"     "0"    
KW-0658~Purine biosynthesis                                                                     "0"   "6"     "8"    
GO:0006164~purine nucleotide biosynthetic process                                               "0"   "6"     "0"    
GO:0006189~'de novo' IMP biosynthetic process                                                   "0"   "5"     "0"    
eco01110:Biosynthesis of secondary metabolites                                                  "0"   "20"    "130"  
GO:0044205~'de novo' UMP biosynthetic process                                                   "0"   "4"     "0"    
eco00230:Purine metabolism                                                                      "0"   "9"     "0"    
GO:0006207~'de novo' pyrimidine nucleobase biosynthetic process                                 "0"   "4"     "0"    
KW-0210~Decarboxylase                                                                           "0"   "5"     "0"    
GO:0006221~pyrimidine nucleotide biosynthetic process                                           "0"   "4"     "0"    
KW-0665~Pyrimidine biosynthesis                                                                 "0"   "4"     "7"    
eco00240:Pyrimidine metabolism                                                                  "0"   "7"     "0"    
KW-1134~Transmembrane beta strand                                                               "0"   "6"     "0"    
eco01230:Biosynthesis of amino acids                                                            "0"   "0"     "67"   
KW-0028~Amino-acid biosynthesis                                                                 "0"   "0"     "52"   
GO:0008652~cellular amino acid biosynthetic process                                             "0"   "0"     "52"   
eco01100:Metabolic pathways                                                                     "0"   "0"     "235"  
GO:0003824~catalytic activity                                                                   "0"   "0"     "93"   
eco01200:Carbon metabolism                                                                      "0"   "0"     "48"   
eco00250:Alanine, aspartate and glutamate metabolism                                            "0"   "0"     "22"   
KW-0816~Tricarboxylic acid cycle                                                                "0"   "0"     "16"   
GO:0006099~tricarboxylic acid cycle                                                             "0"   "0"     "18"   
GO:0005829~cytosol                                                                              "0"   "0"     "194"  
KW-0007~Acetylation                                                                             "0"   "0"     "40"   
eco01120:Microbial metabolism in diverse environments                                           "0"   "0"     "82"   
KW-0055~Arginine biosynthesis                                                                   "0"   "0"     "11"   
GO:0006526~arginine biosynthetic process                                                        "0"   "0"     "11"   
GO:0042802~identical protein binding                                                            "0"   "0"     "97"   
eco00190:Oxidative phosphorylation                                                              "0"   "0"     "23"   
GO:0009060~aerobic respiration                                                                  "0"   "0"     "14"   
GO:0006096~glycolytic process                                                                   "0"   "0"     "12"   
eco01210:2-Oxocarboxylic acid metabolism                                                        "0"   "0"     "16"   
eco00220:Arginine biosynthesis                                                                  "0"   "0"     "13"   
BINDING:Substrate                                                                               "0"   "0"     "58"   
KW-0324~Glycolysis                                                                              "0"   "0"     "11"   
GO:0016491~oxidoreductase activity                                                              "0"   "0"     "74"   
GO:0006541~glutamine metabolic process                                                          "0"   "0"     "10"   
eco00650:Butanoate metabolism                                                                   "0"   "0"     "18"   
eco00340:Histidine metabolism                                                                   "0"   "0"     "8"    
KW-0368~Histidine biosynthesis                                                                  "0"   "0"     "8"    
GO:0000105~histidine biosynthetic process                                                       "0"   "0"     "8"    
eco00010:Glycolysis / Gluconeogenesis                                                           "0"   "0"     "21"   
ACT_SITE:Proton donor                                                                           "0"   "0"     "35"   
GO:0030170~pyridoxal phosphate binding                                                          "0"   "0"     "20"   
KW-0315~Glutamine amidotransferase                                                              "0"   "0"     "8"    
GO:0006974~cellular response to DNA damage stimulus                                             "0"   "0"     "54"   
eco00020:Citrate cycle (TCA cycle)                                                              "0"   "0"     "15"   
GO:0051287~NAD binding                                                                          "0"   "0"     "16"   
GO:0006094~gluconeogenesis                                                                      "0"   "0"     "9"    
GO:0006520~cellular amino acid metabolic process                                                "0"   "0"     "12"   
GO:0006807~nitrogen compound metabolic process                                                  "0"   "0"     "10"   
eco00620:Pyruvate metabolism                                                                    "0"   "0"     "22"   
eco00680:Methane metabolism                                                                     "0"   "0"     "15"   
KW-0100~Branched-chain amino acid biosynthesis                                                  "0"   "0"     "9"    
KW-0830~Ubiquinone                                                                              "0"   "0"     "9"    
GO:0030964~NADH dehydrogenase complex                                                           "0"   "0"     "8"    
KW-0298~Galactitol metabolism                                                                   "0"   "0"     "5"    
KW-0874~Quinone                                                                                 "0"   "0"     "9"    
KW-0663~Pyridoxal phosphate                                                                     "0"   "0"     "20"   
eco00500:Starch and sucrose metabolism                                                          "0"   "0"     "15"   
eco00260:Glycine, serine and threonine metabolism                                               "0"   "0"     "15"   
eco00030:Pentose phosphate pathway                                                              "0"   "0"     "13"   
eco00400:Phenylalanine, tyrosine and tryptophan biosynthesis                                    "0"   "0"     "10"   
eco00310:Lysine degradation                                                                     "0"   "0"     "9"    
KW-1277~Toxin-antitoxin system                                                                  "0"   "0"     "22"   
KW-0238~DNA-binding                                                                             "0"   "0"     "65"   
GO:0003677~DNA binding                                                                          "0"   "0"     "73"   
eco00540:Lipopolysaccharide biosynthesis                                                        "0"   "0"     "11"   
KW-0281~Fimbrium                                                                                "0"   "0"     "10"   
KW-0229~DNA integration                                                                         "0"   "0"     "7"    
KW-0255~Endonuclease                                                                            "0"   "0"     "12"   
KW-0678~Repressor                                                                               "0"   "0"     "25"   
KW-0448~Lipopolysaccharide biosynthesis                                                         "0"   "0"     "14"   
KW-0540~Nuclease                                                                                "0"   "0"     "14"   
                                                                                                Total number of genes associated with term
GO:0015803~branched-chain amino acid transport                                                  "7"                                       
GO:0006865~amino acid transport                                                                 "87"                                      
GO:0098713~leucine import across plasma membrane                                                "4"                                       
KW-0029~Amino-acid transport                                                                    "79"                                      
GO:0015658~branched-chain amino acid transmembrane transporter activity                         "5"                                       
GO:0005304~L-valine transmembrane transporter activity                                          "5"                                       
GO:0015188~L-isoleucine transmembrane transporter activity                                      "5"                                       
GO:1903714~isoleucine transmembrane transport                                                   "5"                                       
eco02024:Quorum sensing                                                                         "64"                                      
GO:0015190~L-leucine transmembrane transporter activity                                         "6"                                       
GO:0015823~phenylalanine transport                                                              "6"                                       
GO:1903785~L-valine transmembrane transport                                                     "8"                                       
GO:0055052~ATP-binding cassette (ABC) transporter complex, substrate-binding subunit-containing "34"                                      
GO:0015192~L-phenylalanine transmembrane transporter activity                                   "4"                                       
eco02010:ABC transporters                                                                       "179"                                     
KW-0813~Transport                                                                               "732"                                     
KW-0346~Stress response                                                                         "150"                                     
KW-0805~Transcription regulation                                                                "337"                                     
KW-0804~Transcription                                                                           "343"                                     
KW-0732~Signal                                                                                  "479"                                     
KW-0658~Purine biosynthesis                                                                     "17"                                      
GO:0006164~purine nucleotide biosynthetic process                                               "18"                                      
GO:0006189~'de novo' IMP biosynthetic process                                                   "12"                                      
eco01110:Biosynthesis of secondary metabolites                                                  "339"                                     
GO:0044205~'de novo' UMP biosynthetic process                                                   "7"                                       
eco00230:Purine metabolism                                                                      "78"                                      
GO:0006207~'de novo' pyrimidine nucleobase biosynthetic process                                 "9"                                       
KW-0210~Decarboxylase                                                                           "21"                                      
GO:0006221~pyrimidine nucleotide biosynthetic process                                           "11"                                      
KW-0665~Pyrimidine biosynthesis                                                                 "11"                                      
eco00240:Pyrimidine metabolism                                                                  "57"                                      
KW-1134~Transmembrane beta strand                                                               "45"                                      
eco01230:Biosynthesis of amino acids                                                            "117"                                     
KW-0028~Amino-acid biosynthesis                                                                 "104"                                     
GO:0008652~cellular amino acid biosynthetic process                                             "103"                                     
eco01100:Metabolic pathways                                                                     "917"                                     
GO:0003824~catalytic activity                                                                   "375"                                     
eco01200:Carbon metabolism                                                                      "110"                                     
eco00250:Alanine, aspartate and glutamate metabolism                                            "33"                                      
KW-0816~Tricarboxylic acid cycle                                                                "23"                                      
GO:0006099~tricarboxylic acid cycle                                                             "29"                                      
GO:0005829~cytosol                                                                              "1051"                                    
KW-0007~Acetylation                                                                             "100"                                     
eco01120:Microbial metabolism in diverse environments                                           "268"                                     
KW-0055~Arginine biosynthesis                                                                   "12"                                      
GO:0006526~arginine biosynthetic process                                                        "12"                                      
GO:0042802~identical protein binding                                                            "456"                                     
eco00190:Oxidative phosphorylation                                                              "43"                                      
GO:0009060~aerobic respiration                                                                  "25"                                      
GO:0006096~glycolytic process                                                                   "19"                                      
eco01210:2-Oxocarboxylic acid metabolism                                                        "26"                                      
eco00220:Arginine biosynthesis                                                                  "18"                                      
BINDING:Substrate                                                                               "259"                                     
KW-0324~Glycolysis                                                                              "18"                                      
GO:0016491~oxidoreductase activity                                                              "356"                                     
GO:0006541~glutamine metabolic process                                                          "15"                                      
eco00650:Butanoate metabolism                                                                   "35"                                      
eco00340:Histidine metabolism                                                                   "8"                                       
KW-0368~Histidine biosynthesis                                                                  "10"                                      
GO:0000105~histidine biosynthetic process                                                       "10"                                      
eco00010:Glycolysis / Gluconeogenesis                                                           "47"                                      
ACT_SITE:Proton donor                                                                           "135"                                     
GO:0030170~pyridoxal phosphate binding                                                          "58"                                      
KW-0315~Glutamine amidotransferase                                                              "12"                                      
GO:0006974~cellular response to DNA damage stimulus                                             "250"                                     
eco00020:Citrate cycle (TCA cycle)                                                              "29"                                      
GO:0051287~NAD binding                                                                          "43"                                      
GO:0006094~gluconeogenesis                                                                      "15"                                      
GO:0006520~cellular amino acid metabolic process                                                "28"                                      
GO:0006807~nitrogen compound metabolic process                                                  "21"                                      
eco00620:Pyruvate metabolism                                                                    "59"                                      
eco00680:Methane metabolism                                                                     "33"                                      
KW-0100~Branched-chain amino acid biosynthesis                                                  "18"                                      
KW-0830~Ubiquinone                                                                              "15"                                      
GO:0030964~NADH dehydrogenase complex                                                           "14"                                      
KW-0298~Galactitol metabolism                                                                   "5"                                       
KW-0874~Quinone                                                                                 "14"                                      
KW-0663~Pyridoxal phosphate                                                                     "59"                                      
eco00500:Starch and sucrose metabolism                                                          "36"                                      
eco00260:Glycine, serine and threonine metabolism                                               "38"                                      
eco00030:Pentose phosphate pathway                                                              "32"                                      
eco00400:Phenylalanine, tyrosine and tryptophan biosynthesis                                    "21"                                      
eco00310:Lysine degradation                                                                     "18"                                      
KW-1277~Toxin-antitoxin system                                                                  "52"                                      
KW-0238~DNA-binding                                                                             "439"                                     
GO:0003677~DNA binding                                                                          "499"                                     
eco00540:Lipopolysaccharide biosynthesis                                                        "38"                                      
KW-0281~Fimbrium                                                                                "28"                                      
KW-0229~DNA integration                                                                         "14"                                      
KW-0255~Endonuclease                                                                            "45"                                      
KW-0678~Repressor                                                                               "150"                                     
KW-0448~Lipopolysaccharide biosynthesis                                                         "65"                                      
KW-0540~Nuclease                                                                                "70"                                      
                                                                                                P-value                        
GO:0015803~branched-chain amino acid transport                                                  "1.97e-06"                     
GO:0006865~amino acid transport                                                                 "1.09e-05"                     
GO:0098713~leucine import across plasma membrane                                                "1.52e-05"                     
KW-0029~Amino-acid transport                                                                    "2.35e-05"                     
GO:0015658~branched-chain amino acid transmembrane transporter activity                         "3.11e-05"                     
GO:0005304~L-valine transmembrane transporter activity                                          "3.11e-05"                     
GO:0015188~L-isoleucine transmembrane transporter activity                                      "3.11e-05"                     
GO:1903714~isoleucine transmembrane transport                                                   "2.83e-05"                     
eco02024:Quorum sensing                                                                         "6.71e-05"                     
GO:0015190~L-leucine transmembrane transporter activity                                         "4.65e-05"                     
GO:0015823~phenylalanine transport                                                              "4.52e-05"                     
GO:1903785~L-valine transmembrane transport                                                     "0.000105"                     
GO:0055052~ATP-binding cassette (ABC) transporter complex, substrate-binding subunit-containing "0.001528139"                  
GO:0015192~L-phenylalanine transmembrane transporter activity                                   "0.00217276"                   
eco02010:ABC transporters                                                                       "0.012294768"                  
KW-0813~Transport                                                                               "0.03394874"                   
KW-0346~Stress response                                                                         "0.015195191;3.54e-07;2.96e-08"
KW-0805~Transcription regulation                                                                "0.020194756;0.000159"         
KW-0804~Transcription                                                                           "0.020194756;0.000205"         
KW-0732~Signal                                                                                  "0.019683791"                  
KW-0658~Purine biosynthesis                                                                     "0.000683;0.042322923"         
GO:0006164~purine nucleotide biosynthetic process                                               "0.003311033"                  
GO:0006189~'de novo' IMP biosynthetic process                                                   "0.005360735"                  
eco01110:Biosynthesis of secondary metabolites                                                  "0.005379245;1.24e-21"         
GO:0044205~'de novo' UMP biosynthetic process                                                   "0.013707388"                  
eco00230:Purine metabolism                                                                      "0.00896889"                   
GO:0006207~'de novo' pyrimidine nucleobase biosynthetic process                                 "0.023952715"                  
KW-0210~Decarboxylase                                                                           "0.027863848"                  
GO:0006221~pyrimidine nucleotide biosynthetic process                                           "0.036542427"                  
KW-0665~Pyrimidine biosynthesis                                                                 "0.024787239;0.016901296"      
eco00240:Pyrimidine metabolism                                                                  "0.030058899"                  
KW-1134~Transmembrane beta strand                                                               "0.025790987"                  
eco01230:Biosynthesis of amino acids                                                            "9.12e-21"                     
KW-0028~Amino-acid biosynthesis                                                                 "3.82e-18"                     
GO:0008652~cellular amino acid biosynthetic process                                             "3.04e-17"                     
eco01100:Metabolic pathways                                                                     "1.7e-17"                      
GO:0003824~catalytic activity                                                                   "5.14e-08"                     
eco01200:Carbon metabolism                                                                      "8.82e-09"                     
eco00250:Alanine, aspartate and glutamate metabolism                                            "1.16e-07"                     
KW-0816~Tricarboxylic acid cycle                                                                "3.63e-07"                     
GO:0006099~tricarboxylic acid cycle                                                             "2.2e-06"                      
GO:0005829~cytosol                                                                              "2.54e-06"                     
KW-0007~Acetylation                                                                             "5.96e-07"                     
eco01120:Microbial metabolism in diverse environments                                           "1.05e-06"                     
KW-0055~Arginine biosynthesis                                                                   "2.22e-06"                     
GO:0006526~arginine biosynthetic process                                                        "1.28e-05"                     
GO:0042802~identical protein binding                                                            "5.87e-05"                     
eco00190:Oxidative phosphorylation                                                              "8.46e-06"                     
GO:0009060~aerobic respiration                                                                  "0.000443"                     
GO:0006096~glycolytic process                                                                   "0.000533"                     
eco01210:2-Oxocarboxylic acid metabolism                                                        "7.35e-05"                     
eco00220:Arginine biosynthesis                                                                  "7.35e-05"                     
BINDING:Substrate                                                                               "0.022461343"                  
KW-0324~Glycolysis                                                                              "0.000533"                     
GO:0016491~oxidoreductase activity                                                              "0.004362816"                  
GO:0006541~glutamine metabolic process                                                          "0.002348499"                  
eco00650:Butanoate metabolism                                                                   "0.000299"                     
eco00340:Histidine metabolism                                                                   "0.000374"                     
KW-0368~Histidine biosynthesis                                                                  "0.001063802"                  
GO:0000105~histidine biosynthetic process                                                       "0.00429118"                   
eco00010:Glycolysis / Gluconeogenesis                                                           "0.000546"                     
ACT_SITE:Proton donor                                                                           "0.039747134"                  
GO:0030170~pyridoxal phosphate binding                                                          "0.012233674"                  
KW-0315~Glutamine amidotransferase                                                              "0.001082243"                  
GO:0006974~cellular response to DNA damage stimulus                                             "0.008391287"                  
eco00020:Citrate cycle (TCA cycle)                                                              "0.00131638"                   
GO:0051287~NAD binding                                                                          "0.025496601"                  
GO:0006094~gluconeogenesis                                                                      "0.014112325"                  
GO:0006520~cellular amino acid metabolic process                                                "0.025133349"                  
GO:0006807~nitrogen compound metabolic process                                                  "0.036499498"                  
eco00620:Pyruvate metabolism                                                                    "0.005984965"                  
eco00680:Methane metabolism                                                                     "0.005984965"                  
KW-0100~Branched-chain amino acid biosynthesis                                                  "0.016901296"                  
KW-0830~Ubiquinone                                                                              "0.036820658"                  
GO:0030964~NADH dehydrogenase complex                                                           "0.04653206"                   
KW-0298~Galactitol metabolism                                                                   "0.016901296"                  
KW-0874~Quinone                                                                                 "0.006740967"                  
KW-0663~Pyridoxal phosphate                                                                     "0.036820658"                  
eco00500:Starch and sucrose metabolism                                                          "0.015420703"                  
eco00260:Glycine, serine and threonine metabolism                                               "0.026108837"                  
eco00030:Pentose phosphate pathway                                                              "0.037861334"                  
eco00400:Phenylalanine, tyrosine and tryptophan biosynthesis                                    "0.037861334"                  
eco00310:Lysine degradation                                                                     "0.043542725"                  
KW-1277~Toxin-antitoxin system                                                                  "2.19e-08"                     
KW-0238~DNA-binding                                                                             "2.06e-05"                     
GO:0003677~DNA binding                                                                          "0.000316"                     
eco00540:Lipopolysaccharide biosynthesis                                                        "0.000497"                     
KW-0281~Fimbrium                                                                                "0.002440319"                  
KW-0229~DNA integration                                                                         "0.009033247"                  
KW-0255~Endonuclease                                                                            "0.012831782"                  
KW-0678~Repressor                                                                               "0.012831782"                  
KW-0448~Lipopolysaccharide biosynthesis                                                         "0.034625247"                  
KW-0540~Nuclease                                                                                "0.040811911"                  

1.5 Fig. 1C-Expression profile of aerobic respiration related genes

#read list of TCA, electron transport chain and ATP synthase genes
aerobic.respiration.table<-read.csv("../Data/Input/aerobic_respiration_genes.csv")
aerobic.respiration.loci<-aerobic.respiration.table$Locus
aerobic.respiration.average.expression<-handel.average.expression[as.vector(aerobic.respiration.loci),]
rownames(aerobic.respiration.average.expression)<-aerobic.respiration.table$Name
#define heatmap breaks
heatmap.breaks<-c(-4,-2,-1,0,0.5,1,1.5)
pheatmap(aerobic.respiration.average.expression[,2:4]-aerobic.respiration.average.expression[,1],scale="none",color =colorRampPalette(rev(brewer.pal(6, "RdBu")) )(6)[6:1],
         cluster_rows=F,cluster_cols = F,fontsize = 8,angle_col = 90,
         breaks = heatmap.breaks,legend_breaks=heatmap.breaks,legend=T, main="Fig. 1C")

2.1 Table 1

#function to generate input files for NetSurgeon algorithm (Brent et al. PNAS 2016)
create.netSurgeon.input<-function(input.network,bayesian.ttest.output,name.output.file)
{
#change gene names (for both rows and columns) to loci in the input network
rownames(input.network)<-sapply(1:nrow(input.network),function(x){strsplit(rownames(input.network)[x],split="_")[[1]][2]})
colnames(input.network)<-sapply(1:ncol(input.network),function(x){strsplit(colnames(input.network)[x],split="_")[[1]][2]})
#define genes present in both datasets (i.e., expression matrix and transcriptional network)
genes.present.in.both.datasets<-intersect(rownames(bayesian.ttest.output),rownames(input.network))
#filter the transcriptional network accordingly
input.network<-input.network[genes.present.in.both.datasets,]
#remove from the TRN those TFs with less than five targets 
input.network<-input.network[,-1*which(colSums(abs(input.network))<5)]
#create input network for NetSurgeon 
#the format of the input network is TFs x Genes
#save input file
write.table(t(input.network),col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".mtr",sep=""))
#extract q-values from Bayesian T-tests
q.values<-bayesian.ttest.output[rownames(input.network),"BH"]
#replace zeroes with minimum q-value among the non-zero values to avoid infinity during the log conversion
position.zero.q.values<-which(q.values ==0)
if(length(position.zero.q.values)>0)
{
  q.values[position.zero.q.values]<-min(q.values[-1* position.zero.q.values])
}
#convert q-values of not differentially expressed genes to 1
q.values[which(q.values > 0.05 | abs(bayesian.ttest.output[rownames(input.network),"meanC"]-bayesian.ttest.output[rownames(input.network),"meanE"])<1)]=1
#estimate the sign of fold-change for each gene
fold.change.sign<- sign(bayesian.ttest.output[rownames(input.network), "fold"])
#sign -log q-values based on fold change direction
DEvector<- fold.change.sign * -log2(q.values) 
#remove any NA
DEvector[which(is.na(DEvector))]<-0
#save input files
write.table(DEvector,sep=" ",col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".vect",sep=""))
regulators<-colnames(input.network)
write.table(regulators,sep=" ",col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".tfs",sep=""))
targets<-rownames(input.network)
write.table(targets,sep=" ",col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".orfs",sep=""))
}
#create files for WT response to tetracycline
create.netSurgeon.input(input.network = transcriptional.network,bayesian.ttest.output = wt.response,name.output.file = "WT_TET")
#create files for TetR at baseline
create.netSurgeon.input(input.network = transcriptional.network,bayesian.ttest.output = tetR.basal,name.output.file = "TetR_basal")
#create files for TetR response to tetracycline
create.netSurgeon.input(input.network = transcriptional.network,bayesian.ttest.output = tetR.response,name.output.file = "TetR_response")
#read compiled NetSurgeon output to create Table 1 (only top 15 TFs for each comparison were considered as differentially active)
netsurgeon.output<-read.csv("../Data/Network_analysis/NetSurgeon/Output/table1_raw_compiled_NetSurgeon_output.csv",header=T)
#initialize table 1-add information about regulon size
table1<-c()
transcriptional.network.loci.tags<-transcriptional.network
#change row- and column names
rownames(transcriptional.network.loci.tags)<-sapply(1:nrow(transcriptional.network.loci.tags),function(x){strsplit(rownames(transcriptional.network.loci.tags)[x],split="_")[[1]][2]})
colnames(transcriptional.network.loci.tags)<-sapply(1:ncol(transcriptional.network.loci.tags),function(x){strsplit(colnames(transcriptional.network.loci.tags)[x],split="_")[[1]][2]})
temporal.table<-c()
for(tf in netsurgeon.output$Locus.tag)
{
  #number of targets of current TF
  regulon.size<-length(intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),rownames(handel.normalized.matrix)))
  #differentially expressed targets that are part of the baseline change
  de.targets.tetR.basal<-length(intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),tetR.basal.degs))
  #differentially expressed targets that are part of TetR response to tetracycline
  de.targets.tetR.adaptive<-length(intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),tetR.response.degs))
  #add collected information to table
  temporal.table<-rbind(temporal.table,cbind(regulon.size                                         ,de.targets.tetR.basal,de.targets.tetR.adaptive))
}
#add column names
colnames(temporal.table)<-c("Regulon size","Targets-basal","Targets-adaptive")
#combine netsurgeon output with temporal table
table1<-cbind(netsurgeon.output[,-1*2:4],temporal.table)
#add loci information
table1<-cbind(convert.locus.to.gene.name(table1[,"Locus.tag"]),table1)
colnames(table1)[1]<-"Transcription factor"
#print Table # 1
print(table1[order(table1[,"Regulon size"],decreasing = T),])
   Transcription factor Locus.tag Differential.activity Response Regulon size Targets-basal Targets-adaptive
14                 rpoS     b2741             Decreased Adaptive          207            20               69
24                 arcA     b4401             Increased Adaptive          167             9               66
7                   hns     b1237             Decreased    Basal          146            18               33
1                  fruR     b0080             Increased Adaptive           76             2               43
4                   lrp     b0889             Increased     Both           64             9               32
6                  phoP     b1130             Decreased    Basal           49             9               15
18                 gadE     b3512             Decreased     Both           36             9               14
9                  marA     b1531             Increased    Basal           33            11               11
13                 rcsB     b2217             Decreased    Basal           33             7               10
21                 soxS     b4062             Increased    Basal           33             7               13
11                 purR     b1658             Increased    Basal           31            10               10
23                  rob     b4396             Increased    Basal           22             8                7
12                 fliZ     b1921             Increased    Basal           20             6                6
16                 ompR     b3405             Increased     Both           13             4                6
19                 cytR     b3934             Increased Adaptive           13             0                8
5                  torR     b0995             Decreased    Basal           12             3                4
10                 dgsA     b1594             Increased Adaptive           10             0                7
17                 malT     b3418             Decreased Adaptive           10             0                8
2                  nrdR     b0413             Decreased Adaptive            9             1                4
3                  ybjK     b0846             Increased Adaptive            8             1                5
22                 adiY     b4116             Decreased     Both            8             3                5
8                  pspF     b1303             Increased     Both            7             4                5
15                 glcC     b2980             Increased    Basal            6             3                3
25                 gatR     b4498             Increased Adaptive            6             0                6
20                 birA     b3973             Decreased Adaptive            5             0                5
#evaluate how many DEGs are controlled by the 25 TFs
#all genes differentially expressed in the TetR strain background (baseline combined with adaptive response)
tetR.DEGs<-union(tetR.basal.degs,tetR.response.degs)
#compile genes that are differentially expressed by TetR and are regulated by the 25 TFs in NetSurgeon output
genes.affected.by.tetR.phenotype<-c()
for(tf in table1[,"Locus.tag"])
{
  #define members of current TF regulon that are differentially expressed in basal or adaptive response
  de.targets.tetR<-intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),tetR.DEGs)
  #add genes into compilation set
  genes.affected.by.tetR.phenotype<-union(genes.affected.by.tetR.phenotype,de.targets.tetR)
}
print(length(genes.affected.by.tetR.phenotype))
[1] 279
#similar analysis but counting DEGs controlled by the 15 TFs associated with TetR response to tetracycline (i.e., adaptive TFs)
#define adaptive TFs
adaptive.tfs<-table1[-1*(which(table1[,"Response"]=="Basal")),"Locus.tag"]
#vector with members of adaptive TFs that were differentially expressed by TetR in response to tetracycline
target.genes.tetR.response<-c()
#vector with all members of adaptive TF regulons
adaptive.tfs.total.targets<-c()
for(tf in adaptive.tfs)
{
  #define current TF regulon
  current.tf.regulon<-names(which(transcriptional.network.loci.tags[,tf]!=0))
  #define members of current TF regulon that were differentially expressed as part TetR adaptive response
  de.targets.adaptive<-intersect(current.tf.regulon,tetR.response.degs)
  #add genes into corresponding sets
  target.genes.tetR.response<-union(target.genes.tetR.response,de.targets.adaptive)
  adaptive.tfs.total.targets<-union(adaptive.tfs.total.targets,intersect(current.tf.regulon,rownames(handel.normalized.matrix)))
}
print(length(target.genes.tetR.response))
[1] 209
#compute hyper-geometric test p-value
phyper(length(target.genes.tetR.response)-1,length(adaptive.tfs.total.targets),nrow(handel.normalized.matrix)-length(adaptive.tfs.total.targets),length(tetR.response.degs),lower.tail = F)
[1] 2.091052e-27

2.2 Fig 2A: interactions among differentially active TFs

#initialize TF-TF network
tf.tf.network<-c()
#add TF-TF interactions that involve TFs included in Table 1
for(tf in 1:nrow(table1))
{
  #current TF
  tf.locus<-as.character(table1[tf,"Locus.tag"])
  tf.name<-as.character(table1[tf,"Transcription factor"])
  #TF regulon 
  tf.regulon<-names(which(transcriptional.network.loci.tags[,tf.locus]!=0))
  #target TFs - excluding autoregulation
  target.tfs<-setdiff(intersect(tf.regulon,as.character(table1[,"Locus.tag"])),tf.locus)
  #add relevant interactions
  if(length(target.tfs) > 0)
  {
    #sign (activation or repression) of TF-TF interactions
    interaction.signs<-sign(transcriptional.network.loci.tags[as.character(target.tfs),tf.locus])
    #expand the TF-TF network
    tf.tf.network<-rbind(tf.tf.network,cbind(rep(tf.name,length(target.tfs)),convert.locus.to.gene.name(target.tfs),interaction.signs))
  }
}
#add column names
colnames(tf.tf.network)<-c("Regulator","Target","Sign")
#extend the TF-TF network to take into account subset of target genes that were differentially expressed (DE) in TetR response
#focused on the 15 TFs associated with TetR adaptive response and their DE targets
temporal.subnetwork.matrix<-transcriptional.network.loci.tags[as.character(target.genes.tetR.response),adaptive.tfs]
#define unique profiles in transcriptional subnetwork (i.e., keeping a single row to represent all genes with the same regulators in the temporal.subnetwork.matrix)
unique.gene.profiles<-unique.matrix(temporal.subnetwork.matrix)
#groups of genes with same regulators
gene.groups<-list()
#regulators of each gene group
tf.groups<-c()
for (r in 1:nrow(unique.gene.profiles))
{
  temporal.gene.group<-c()
  current.profile<-unique.gene.profiles[r,]
  #define which genes in the temporal.subnetwork.matrix have the same profile (i.e., same regulators)
  for(g in target.genes.tetR.response)
  {
    if(apply(temporal.subnetwork.matrix[g,]==t(current.profile),1,all))
    {
      temporal.gene.group<-c(temporal.gene.group,g)
    }
  }
  #only save groups with more than five genes
  if(length(temporal.gene.group)>5)
  {
    gene.groups[[length(gene.groups)+1]]<-temporal.gene.group
    group.regulators<-convert.locus.to.gene.name(names(which(current.profile!=0)))
    tf.groups<-rbind(tf.groups,cbind(group.regulators,                              rep(paste("Group",length(gene.groups),sep=""),length(group.regulators)),length(temporal.gene.group)))
   write.csv(file=paste("../Data/Network_analysis/NetSurgeon/Output/temporal_coregulated_gene_groups_adaptive_response_g",length(gene.groups),"_",Sys.Date(),".csv",sep=""),gene.groups[[length(gene.groups)]])
    }
}
#add column names
colnames(tf.groups)<-c("Regulator","Target","#number of genes")
#merge the gene clusters with the TF-TF network
tf.tf.network.unsigned<-rbind(tf.tf.network[,1:2],tf.groups[,1:2])
#plot resulting gene network - Fig. 2A draft. 
#the TF-TF network in manuscript was visualized with Cytoscape
tf.tf.network.igraph.format<-graph_from_data_frame(tf.tf.network.unsigned                                              ,union(tf.tf.network.unsigned[,"Regulator"],tf.tf.network.unsigned[,"Target"]), directed = T)
selected.network.layout <- layout_nicely(tf.tf.network.igraph.format)
plot(tf.tf.network.igraph.format,layout = selected.network.layout, edge.arrow.size =0.2,vertex.label.cex=0.5,vertex.size=13,main="Fig. 2A (draft)")

#complement Netsurgeon results with analysis of E. coli EGRIN2 model (Brooks et al. 2014, MSB)
#read EGRIN2 corems (in simplified terms, a corem is a group of co-regulated genes)
ecoli.corems.table<-read.csv("../Data/Network_analysis/EGRIN_model/ecoli_egrin2_corems.csv")
#define E. coli corems
ecoli.corems<-list()
#save names of all genes included the E. coli EGRIN model
ecoli.egrin2.genes<-c()
#save number of genes in each corem
corems.size<-c()
for(s in 1:nrow(ecoli.corems.table))
{
  current.corem<-strsplit(as.character(ecoli.corems.table$Genes[s]),split=";",fixed = T)[[1]]
  ecoli.corems[[length(ecoli.corems)+1]]<-current.corem
  corems.size<-c(corems.size,length(current.corem))
  ecoli.egrin2.genes<-union(ecoli.egrin2.genes,current.corem)
}
#name corems as c1, c2, etc.
names(ecoli.corems)<-paste("c",1:length(ecoli.corems),sep="")
#function to identify corems enriched with a particular (user-specified) set of genes
evaluate.corem.enrichment<-function(corem.set,gene.set)
{
  #matrix with p-values
  corem.enrichment = matrix(nrow=length(corem.set),ncol=5)
  colnames(corem.enrichment)<-c('q','m','n','k','p_value')
  rownames(corem.enrichment)<-names(corem.set)
  for(x in 1:length(corem.set)) {
    q = length(intersect(corem.set[[x]],gene.set))
    m = length(corem.set[[x]])
    k = length(intersect(gene.set,unique(ecoli.egrin2.genes)))
    n = length(ecoli.egrin2.genes)-m
    pval = NA
    #if the intersect is bigger than zero
    if(q > 0) {
      #compute hypergeometric test p-value
      pval = phyper(q,m,n,k,lower.tail=F)
    }
    #add values to matrix
    corem.enrichment[x,] = c(q,m,n,k,pval)
  }
  #perform multiple hypotheses testing correction
  corem.enrichment = cbind(corem.enrichment,p_value.BH=p.adjust(corem.enrichment[,'p_value'],method='BH',n=nrow(corem.enrichment)))
  #select enriched corems with ten or more members
  final.enriched.corems = rownames(corem.enrichment)[which(corem.enrichment[,'p_value.BH']<=0.05 & corem.enrichment[,'m']> 9)]
  output<-corem.enrichment[final.enriched.corems,]
  output
}
#evaluate enrichment of EGRIN corems (also known as modules) with DEGs of TetR baseline and adaptive responses
modules.enriched.with.tetR.baseline<-evaluate.corem.enrichment(ecoli.corems, tetR.basal.degs)
modules.enriched.with.tetR.adaptive<-evaluate.corem.enrichment(ecoli.corems, tetR.response.degs)
#save names of TF regulons over-represented in corems enriched with TetR basaline changes
corems.enriched.with.tetR.basal.TF.regulons<-c()
#save names of TF regulons over-represented in corems enriched with TetR adaptive response 
corems.enriched.with.tetR.adaptive.TF.regulons<-c()
#save genes in corems enriched with both TetR DEGs AND targets of NetSurgeon predicted TFs
corems.enriched.with.tf.regulons.and.degs<-c()
#save ID of corems enriched with both gene sets
significant.corems<-c()
#-1*c(2,3,5,8,11,16,19,20)
for(tf in table1[,"Locus.tag"])
{
  current.regulon<-names(which(transcriptional.network.loci.tags[,tf]!=0))
  #identify corems enriched with current TF regulon
  temporal.enrichment.information<-evaluate.corem.enrichment(ecoli.corems, current.regulon)
  #define corems enriched with current TF regulon and TetR basal response
  intersect.corems<-intersect(rownames(modules.enriched.with.tetR.baseline),rownames(temporal.enrichment.information))
  #if there is one or more corems enriched with both gene sets
  if(length(intersect.corems)>0)
  {
    corems.enriched.with.tetR.basal.TF.regulons<-c(corems.enriched.with.tetR.basal.TF.regulons,tf)
    #compile members of the corems enriched with both gene sets
    for(c in intersect.corems)
    {
      corems.enriched.with.tf.regulons.and.degs<-union(corems.enriched.with.tf.regulons.and.degs,ecoli.corems[[c]])
      significant.corems<-union(significant.corems,c)
    }
  }
  #repeat analysis for TetR adaptive response
  #define corems enriched with current TF regulon and TetR adaptive response
  intersect.corems<-intersect(rownames(modules.enriched.with.tetR.adaptive),rownames(temporal.enrichment.information))
  if(length(intersect.corems)>0)
  {
    corems.enriched.with.tetR.adaptive.TF.regulons<-c(corems.enriched.with.tetR.adaptive.TF.regulons,tf)
    for(c in intersect.corems)
    {
      corems.enriched.with.tf.regulons.and.degs<-union(corems.enriched.with.tf.regulons.and.degs,ecoli.corems[[c]])
      significant.corems<-union(significant.corems,c)
    }
  }
}
#define final set of TFs identified by both NetSurgeon and EGRIN-based analysis
tfs.detected.by.egrin.analysis<-union(corems.enriched.with.tetR.basal.TF.regulons,corems.enriched.with.tetR.adaptive.TF.regulons)
print(convert.locus.to.gene.name(tfs.detected.by.egrin.analysis))
 [1] "lrp"  "torR" "phoP" "hns"  "pspF" "marA" "purR" "fliZ" "rcsB" "rpoS" "ompR" "gadE" "fruR" "dgsA" "malT" "cytR" "birA" "soxS" "rob"  "arcA" "gatR"
#identify TFs with differential regulatory activity (alternative approach)
#we first estimate TF activity
genes.present.in.transcriptional.network.and.expression.matrix<-intersect(rownames(transcriptional.network.loci.tags),rownames(handel.normalized.matrix))
final.trn.network<-transcriptional.network.loci.tags[genes.present.in.transcriptional.network.and.expression.matrix,]
#remove TFs with less than five target genes
final.trn.network<-final.trn.network[,-1 * which(colSums(abs(final.trn.network))<5)]
#keep only shared genes in the expression matrix
tetracycline.expression.matrix<-as.matrix(handel.normalized.matrix[genes.present.in.transcriptional.network.and.expression.matrix,])
#compute pseudoinverse of the transcriptional network
pseudoinverse.matrix <- pseudoinverse(final.trn.network)
#compute TF activities
estimated.tf.activities <- pseudoinverse.matrix %*% tetracycline.expression.matrix
rownames(estimated.tf.activities)<- colnames(final.trn.network)
#perform t-test to compare estimated TFAs across strains/conditions
#vector with p-values
tfa.pvals<-c()
#vector with regulon size
regulon.size<-c()
#vector with p-values for TetR basal response: TetR(-TET) vs WT(-TET)
pvalues.activity.tetR.basaline<-c()
#vector with p-values for TetR adaptive response: TetR(+TET) vs TetR(-TET)
pvalues.activity.tetR.adaptive<-c()
for(r in 1:nrow(estimated.tf.activities))
{
  #perform t-tests
  temporal.pvalue<-t.test(estimated.tf.activities[r,7:9],estimated.tf.activities[r,1:3])$p.value
  pvalues.activity.tetR.basaline<-c(pvalues.activity.tetR.basaline,temporal.pvalue)
  temporal.pvalue2<-t.test(estimated.tf.activities[r,7:9],estimated.tf.activities[r,10:12])$p.value
  pvalues.activity.tetR.adaptive<-c(pvalues.activity.tetR.adaptive,temporal.pvalue2)
}
#perform multiple hypothesis correction
ttest.activities.matrix<-cbind(estimated.tf.activities,pvalues.activity.tetR.basaline,p.adjust(pvalues.activity.tetR.basaline,method = "BH"),pvalues.activity.tetR.adaptive,p.adjust(pvalues.activity.tetR.adaptive,method = "BH"))
colnames(ttest.activities.matrix)[c(13:16)]<-c("p.value.baseline","adj.p.value.baseline","p.value.adaptive","adj.p.value.adaptive")
#Check change in activity of the 25 TFs previously identified by NetSurgeon
print(cbind(convert.locus.to.gene.name(table1[,"Locus.tag"]),round(ttest.activities.matrix[as.character(table1[,"Locus.tag"]),13:16],digits=4)))
             p.value.baseline adj.p.value.baseline p.value.adaptive adj.p.value.adaptive
b0080 "fruR" "0.4182"         "0.5933"             "0.0642"         "0.1186"            
b0413 "nrdR" "0.3079"         "0.4878"             "0.0016"         "0.0142"            
b0846 "ybjK" "0.0274"         "0.1738"             "0.0342"         "0.0758"            
b0889 "lrp"  "0.0096"         "0.0979"             "0.0097"         "0.0347"            
b0995 "torR" "5e-04"          "0.0286"             "0.3503"         "0.407"             
b1130 "phoP" "0.0678"         "0.2446"             "0.2795"         "0.3311"            
b1237 "hns"  "0.0305"         "0.1738"             "0.0093"         "0.0343"            
b1303 "pspF" "0.0043"         "0.0757"             "3e-04"          "0.0079"            
b1531 "marA" "0"              "0.0047"             "0.5063"         "0.5529"            
b1594 "dgsA" "0.624"          "0.7538"             "0.0063"         "0.0286"            
b1658 "purR" "0.2"            "0.3896"             "0.5076"         "0.5529"            
b1921 "fliZ" "0.0152"         "0.1424"             "0.3155"         "0.3701"            
b2217 "rcsB" "0.7908"         "0.8463"             "0.1533"         "0.2078"            
b2741 "rpoS" "0.0722"         "0.2446"             "0.0081"         "0.0318"            
b2980 "glcC" "0.0682"         "0.2446"             "0.04"           "0.0814"            
b3405 "ompR" "0.0655"         "0.2446"             "0.1112"         "0.1696"            
b3418 "malT" "0.7271"         "0.8003"             "0.0215"         "0.0546"            
b3512 "gadE" "0.0011"         "0.0431"             "0.0016"         "0.0142"            
b3934 "cytR" "0.0291"         "0.1738"             "0.0013"         "0.0136"            
b3973 "birA" "0.2412"         "0.4445"             "0.0574"         "0.1129"            
b4062 "soxS" "0.0225"         "0.1738"             "0.0209"         "0.0541"            
b4116 "adiY" "0.0078"         "0.0916"             "8e-04"          "0.0108"            
b4396 "rob"  "0.0708"         "0.2446"             "0.0282"         "0.0673"            
b4401 "arcA" "0.8351"         "0.8768"             "0.0074"         "0.0309"            
b4498 "gatR" "0.616"          "0.7538"             "0.0119"         "0.0377"            

2.3 Fig. 2B - Fold change of selected TF regulons

#define matrix with (average) fold-change values with respect to untreated WT
handel.foldchange.matrix<-handel.average.expression[,2:4]-handel.average.expression[,1]
#function to compute significance of fold-change of a given set of genes vs random selection
    permutation.function<-function(true.genes,column.of.fold.change.matrix)
    {
      #number of random gene sampling
      N=10000
      true.average.fold.change<-mean(handel.foldchange.matrix[true.genes,column.of.fold.change.matrix])
      random.compilation<-c()
    for(n in 1:N)
    {
      random.fold.change<-handel.foldchange.matrix[sample(1:nrow(handel.foldchange.matrix),length(true.genes)),column.of.fold.change.matrix]
      random.compilation<-c(random.compilation,mean(random.fold.change))
    }
      #conservative p-value estimation (minimum of one-tailed vs two-tailed test)
      estimated.pvalue<-min(c(length(which(random.compilation >= true.average.fold.change))/N,
                              length(which(random.compilation <= true.average.fold.change))/N,
                              length(which(abs(random.compilation)>=abs(true.average.fold.change)))))
      #return estimated p-value
      estimated.pvalue
    }
#generate boxplots for selected TFs (predicted as differentially active based on network analyses above)    
selected.tfs<-c("arcA","marA","gadE")
for(tf.name in selected.tfs)
{
  tf.locus<-translate.gene.name.to.locus(tf.name)
  if(tf.name=="arcA")
  {
   #focus on ArcA repressed genes 
   #ArcA is mainly a repressor: it represses 73.2% of its targets
   #furthermore, 80% of ArcA DE target genes are repressed by ArcA
   current.regulon.repression<-names(which(final.trn.network[,tf.locus]<0))
   #focus on DEGs (either in the TetR basaline or adaptive response)
   current.regulon.repression<-intersect(current.regulon.repression,tetR.DEGs)
   #fold-change matrix with DE target genes
   temporal.fold.change.matrix<-rbind(cbind(handel.foldchange.matrix[current.regulon.repression,1],rep("WT(+)",length(current.regulon.repression))),
                           cbind(handel.foldchange.matrix[current.regulon.repression,2],rep("TetR(-)",length(current.regulon.repression))),
                           cbind(handel.foldchange.matrix[current.regulon.repression,3],rep("TetR(+)",length(current.regulon.repression))))
    temporal.fold.change.matrix<-as.data.frame(temporal.fold.change.matrix)
    #define column names
    colnames(temporal.fold.change.matrix)<-c("Expression","Strain")
    temporal.fold.change.matrix$Expression<-as.numeric(as.vector(temporal.fold.change.matrix$Expression))
    temporal.fold.change.matrix$Strain<-factor(temporal.fold.change.matrix$Strain,levels = c("WT(+)","TetR(-)","TetR(+)"),ordered = TRUE)
    #create barplot with ggplot
    barplot.arcA <- ggplot(temporal.fold.change.matrix, aes(x=Strain, y=Expression,fill=Strain)) + 
      geom_boxplot(color="red")+ theme(text = element_text(size=24))
    print(barplot.arcA+scale_fill_manual(values=rep("white",3)) + 
    ggtitle(paste(tf.name,"--|",length(current.regulon.repression),"genes",sep=" ")))
    #run random permutation test for each comparison (e.g., WT+TET vs WT-TET, TetR-TET vs WT-TET, etc.)
    print(paste(tf.name,"-WT+TET vs WT-TET:",permutation.function(current.regulon.repression,1),sep=""))
    print(paste(tf.name,"-TetR-TET vs WT-TET:",permutation.function(current.regulon.repression,2),sep=""))
    print(paste(tf.name,"-TetR+TET vs WT-TET:",permutation.function(current.regulon.repression,3),sep=""))
  }
  #if TF is MarA or GadE
  #both MarA and GadE mainly act as activators (i.e., positively regulate >79% of their target genes)
  else
  {
  current.regulon.activation<-names(which(final.trn.network[,tf.locus]>0))
  if(length(current.regulon.activation)>0)
    {
   #focus on DEGs (either in the TetR basaline or adaptive response)
   current.regulon.activation<-intersect(current.regulon.activation,tetR.DEGs)
   #fold-change matrix with DE target genes
   temporal.fold.change.matrix<-rbind(cbind(handel.foldchange.matrix[current.regulon.activation,1],rep("WT(+)",length(current.regulon.activation))),
                           cbind(handel.foldchange.matrix[current.regulon.activation,2],rep("TetR(-)",length(current.regulon.activation))),
                           cbind(handel.foldchange.matrix[current.regulon.activation,3],rep("TetR(+)",length(current.regulon.activation))))
  
    temporal.fold.change.matrix<-as.data.frame(temporal.fold.change.matrix)
    colnames(temporal.fold.change.matrix)<-c("Expression","Strain")
    temporal.fold.change.matrix$Expression<-as.numeric(as.vector(temporal.fold.change.matrix$Expression))
    temporal.fold.change.matrix$Strain<-factor(temporal.fold.change.matrix$Strain,levels = c("WT(+)","TetR(-)","TetR(+)"),ordered = TRUE)
    #create barplot with ggplot
    barplot.activation <- ggplot(temporal.fold.change.matrix, aes(x=Strain, y=Expression,fill=Strain)) + 
      geom_boxplot(color="green")+ theme(text = element_text(size=24))
    print(barplot.activation+scale_fill_manual(values=rep("white",3)) +  ggtitle(paste(tf.name,"-->",length(current.regulon.activation),"genes",sep=" "))) 
  }
    #run random permutation test for each comparison (e.g., WT+TET vs WT-TET, TetR-TET vs WT-TET, etc.)
     print(paste(tf.name,"-WT+TET vs WT-TET:",permutation.function(current.regulon.activation,1),sep=""))
    print(paste(tf.name,"-TetR-TET vs WT-TET:",permutation.function(current.regulon.activation,2),sep=""))
    print(paste(tf.name,"-TetR+TET vs WT-TET:",permutation.function(current.regulon.activation,3),sep=""))
  }
}
[1] "arcA-WT+TET vs WT-TET:0.3709"
[1] "arcA-TetR-TET vs WT-TET:0.0972"
[1] "arcA-TetR+TET vs WT-TET:0"
[1] "marA-WT+TET vs WT-TET:0.3962"
[1] "marA-TetR-TET vs WT-TET:0"
[1] "marA-TetR+TET vs WT-TET:8e-04"
[1] "gadE-WT+TET vs WT-TET:0"
[1] "gadE-TetR-TET vs WT-TET:0"
[1] "gadE-TetR+TET vs WT-TET:0"

2.3 Evaluating overlap between DEGs in TetR adaptive response and arcA KO in anaerobic growth (GEO accession ID: GSE1107)

#transcriptional data for arcA KO was sourced from Covert et al. (Nature 2004)
arcA.ko.matrix.covert<-read.csv("../Data/Differential_expression_analysis/GEO_Covert2004/arcA_ko_Covert2004.csv",header=T,row.names = 1)
#select replicates for WT and arcA KO in anaerobic condition
arcA.ko.matrix.covert<-arcA.ko.matrix.covert[,c("ana_wt1","ana_wt2","ana_wt3","ana_wt4","ana_arcA1","ana_arcA2","ana_arcA3")]
#rename columns
colnames(arcA.ko.matrix.covert)<-c("anaerobic_wt1","anaerobic_wt2","anaerobic_wt3","anaerobic_wt4","anaerobic_arcA1","anaerobic_arcA2","anaerobic_arcA3")
#read microarray probes to loci map
arcA.microarray.probes.loci.map<-read.csv("../Data/Differential_expression_analysis/GEO_Covert2004/arcA_covert2004_probes_gene_map.csv")
#filter out those genes not included in the expression matrix for WT and TetR previously analyzed
arcA.microarray.probes.loci.map<-arcA.microarray.probes.loci.map[arcA.microarray.probes.loci.map$ORF %in% rownames(handel.normalized.matrix),]
#this will be the final matrix with locus tags as rownames
final.arcA.ko.matrix.covert<-c()
#store set of genes present in the resulting expression matrix
genes.included.in.covert.data<-c()
#change probe names to loci
for(g in 1:nrow(arcA.microarray.probes.loci.map))
{
  current.gene<-as.character(arcA.microarray.probes.loci.map$ORF[g])
  genes.included.in.covert.data<-c(genes.included.in.covert.data,current.gene)
  current.gene.position<-grep(current.gene,rownames(arcA.ko.matrix.covert))
  final.arcA.ko.matrix.covert<-rbind(final.arcA.ko.matrix.covert,arcA.ko.matrix.covert[current.gene.position,])
}
rownames(final.arcA.ko.matrix.covert)<-genes.included.in.covert.data
#log2 transform the microarray data
final.arcA.ko.matrix.covert<-log2(final.arcA.ko.matrix.covert)
#perform differential expression analysis (as previously done above)
arcA.ko.response.covert<-bayesT(final.arcA.ko.matrix.covert,numC = 4,numE = 3,conf = 7,doMulttest = T)
#DEGs due to arcA deletion
arcA.degs.covert<-rownames(arcA.ko.response.covert)[which(arcA.ko.response.covert$BH < 0.05 & abs(arcA.ko.response.covert$meanC - arcA.ko.response.covert$meanE)>1)]
#define overlap between DEGs in arcA KO and TetR response to tetracycline
overlapping.DEGs<-intersect(arcA.degs.covert,tetR.response.degs)
#evalute overlap significance with hypergeometric test 
overlap.pvalue<-phyper(length(overlapping.DEGs)-1,length(arcA.degs.covert),nrow(arcA.ko.response.covert)-length(arcA.degs.covert),length(intersect(tetR.response.degs,rownames(arcA.ko.response.covert))),lower.tail = F)
print(paste("Overlap p-value=",overlap.pvalue,sep=""))
[1] "Overlap p-value=5.46797704435238e-12"

3.1 Fig 3A (initial analysis of barcode sequencing data of genome-wide single gene KO library growth competition assays)

#read counts per design for TetR KO library (4 biological replicates per time point)
#this is the output of the InscriptaResolver  software 
tetR.ko.library.reads<-read.csv("../Data/KO_library_competition/raw_ko_count_data/TetR/a05h/a05h_design_report.csv",header=T)
#define unique samples (i.e., combinations of strain x replicate x treatment x time point)
#the following notation was used to label the samples: strain (tetR or wt)-K(for KO){a,b,c,d}(corresponding to the biological replicate)-'plus' or 'minus' tetracycline - {1,2,3} corresponding to the growth cycle (i.e., T1, T2, T3)
#0-plusTet and 0-minusTet correspond to T0 and they are the same sample (eg, TetR-Ka-minusTet is the same as TetR-Ka-0-plusTet) 
#T0 samples were duplicated when processing the data to facilitate (treatment-specific) downstream analyses   
tetR.ko.library.samples<-unique(tetR.ko.library.reads$SampleName)
#define KO designs
tetR.ko.library.designs<-unique(tetR.ko.library.reads$DesignId)
#create read count matrix that includes a column with the name of the gene associated with each KO design
#tetR.ko.library.count.matrix<-matrix(ncol=length(tetR.ko.library.samples)+1,nrow=length(tetR.ko.library.designs),0,
#                             dimnames = list(tetR.ko.library.designs,c("gene",tetR.ko.library.samples)))
#loop to fill out the count matrix
#for(s in tetR.ko.library.samples)
#{
#  print(s)
#  for(d in tetR.ko.library.designs)
#  {
#    current.position<-which(tetR.ko.library.reads$SampleName==s & tetR.ko.library.reads$DesignId==d)
#    current.count<-tetR.ko.library.reads[current.position,"DesignCount"]
#    tetR.ko.library.count.matrix[as.character(d),s]<- current.count
#    tetR.ko.library.count.matrix[as.character(d),"gene"]<-tetR.ko.library.reads[current.position,"Design_TargetName"]
#  }
#}
#tetR.ko.library.count.matrix.with.gene.info<-tetR.ko.library.count.matrix
#load previously compiled (with code shown above) TetR KO library read count matrix 
load("../Data/KO_library_competition/processed_count_data/tetR.ko.library.count.matrix.with.gene.info.RData")
#re-organize columns (ie, sample replicates)
tetR.ko.read.counts<-tetR.ko.library.count.matrix.with.gene.info[,c(10,2,26,18,12,4,28,20,13,5,29,21,14,6,30,22,15,7,31,23,16,8,32,24,17,9,33,25)]
#change column names to a simpler notation: Cycle-(+/-)TET-replicate{a,b,c,d}
colnames(tetR.ko.read.counts)<-paste(rep(c("C0","C1-TET-","C2-TET-","C3-TET-","C1+TET-","C2+TET-","C3+TET-"),each=4),letters[1:4],sep="")
#Ci indicates cycle # i in the competition assay. This means that C0=t0, C1=t1, etc.
#remove the column with gene names to create a new matrix that only contains numeric values
tetR.ko.read.counts.numeric<-c()
for(c in 1:ncol(tetR.ko.read.counts))
{
  tetR.ko.read.counts.numeric<-cbind(tetR.ko.read.counts.numeric,as.numeric(tetR.ko.read.counts[,c]))
}
colnames(tetR.ko.read.counts.numeric)<-colnames(tetR.ko.read.counts)
rownames(tetR.ko.read.counts.numeric)<-rownames(tetR.ko.read.counts)
#repeat process for WT KO library
#read counts per design for WT KO libraries (4 biological replicates per time point)
#this is the output of the InscriptaResolver  software
wt.ko.library.reads<-read.csv("../Data/KO_library_competition/raw_ko_count_data/WT/a05h/a05h_design_report.csv",header=T)
#define unique samples (with same notation used for the TetR KO library)
wt.ko.library.samples<-unique(wt.ko.library.reads$SampleName)
#define KO designs
wt.ko.library.designs<-unique(wt.ko.library.reads$DesignId)
#create read count matrix that includes a column with the name of the gene associated with each KO design
#wt.ko.library.count.matrix<-matrix(ncol=length(wt.ko.library.samples)+1,nrow=length(wt.ko.library.designs),0,
#                           dimnames = list(as.character(wt.ko.library.designs),c("gene",wt.ko.library.samples)))
#loop to fill out the count matrix
#for(s in wt.ko.library.samples)
#{
#  print(s)
#  for(d in wt.ko.library.designs)
#  {
#  current.position<-which(wt.ko.library.reads$SampleName==s & wt.ko.library.reads$DesignId==d)
#  current.count<-wt.ko.library.reads[current.position,"DesignCount"]
#  wt.ko.library.count.matrix[as.character(d),s]<- current.count
#  wt.ko.library.count.matrix[as.character(d),"gene"]<-wt.ko.library.reads[current.position,"Design_TargetName"]
#  }
#}
#wt.ko.library.count.matrix.with.gene.info<-wt.ko.library.count.matrix
#load previously compiled WT KO library read count matrix
load("../Data/KO_library_competition/processed_count_data/wt.ko.library.count.matrix.with.gene.info.RData")
#re-organize columns (ie, sample replicates)
wt.ko.read.counts<-wt.ko.library.count.matrix.with.gene.info[,c(10,2,26,18,12,4,28,20,13,5,29,21,14,6,30,22,15,7,31,23,16,8,32,24,17,9,33,25)]
#change column names using a new simpler notation: Cycle-(+/-)TET-replicate{a,b,c,d}
colnames(wt.ko.read.counts)<-colnames(tetR.ko.read.counts)
#remove the gene column to create a new matrix that only contains numeric values
wt.ko.read.counts.numeric<-c()
for(c in 1:ncol(wt.ko.read.counts))
{
  wt.ko.read.counts.numeric<-cbind(wt.ko.read.counts.numeric,as.numeric(wt.ko.read.counts[,c]))
}
colnames(wt.ko.read.counts.numeric)<-colnames(wt.ko.read.counts)
rownames(wt.ko.read.counts.numeric)<-rownames(wt.ko.read.counts)
#define depleted (ie, undetected) KO designs at each time point
#a KO design was considered not detected at a time point of interest when all four biological replicates had less than 10 reads for the corresponding KO design 
#initialize list to store KO designs labeled as depleted in TetR library
tetR.depleted.designs.time.series<-NULL
tetR.depleted.designs.time.series[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected KO designs per sample/time point
tetR.non.depleted.designs.vector<-c()
#a loop to define depleted KO designs at each point
for(tetR.sample in 1:7)
{
  depleted.ko.designs.current.sample<-c()
  #the designs '500955653' and '500955654' are excluded from this analysis because they were used as internal controls and they are not associated with any gene
  for(ko.design in setdiff(tetR.ko.library.designs,c("500955653","500955654")))
  {
    if(max(tetR.ko.read.counts.numeric[as.character(ko.design),((4*tetR.sample)-3):(4*tetR.sample)])<10)
  {
    depleted.ko.designs.current.sample<-c(depleted.ko.designs.current.sample,ko.design)
  }
  }
  tetR.depleted.designs.time.series[[tetR.sample]]<-depleted.ko.designs.current.sample
  tetR.non.depleted.designs.vector<-c(tetR.non.depleted.designs.vector,8271-length(depleted.ko.designs.current.sample))
}
names(tetR.non.depleted.designs.vector)<-names(tetR.depleted.designs.time.series)
#same analysis for WT
#initialize list to store KO designs labeled as depleted in WT library
wt.depleted.designs.time.series<-NULL
wt.depleted.designs.time.series[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected KO designs per sample/time point
wt.non.depleted.designs.vector<-c()
for(wt.sample in 1:7)
{
  depleted.designs.current.sample<-c()
  for(ko.design in setdiff(wt.ko.library.designs,c("500955653","500955654")))
  {
    if(max(wt.ko.read.counts.numeric[as.character(ko.design),((4*wt.sample)-3):(4*wt.sample)])<10)
  {
    depleted.designs.current.sample<-c(depleted.designs.current.sample,ko.design)
  }
  }
  wt.depleted.designs.time.series[[wt.sample]]<-depleted.designs.current.sample
  wt.non.depleted.designs.vector<-c(wt.non.depleted.designs.vector,8271-length(depleted.designs.current.sample))
}
names(wt.non.depleted.designs.vector)<-names(wt.depleted.designs.time.series)
#create left panel of Fig. 3A
par(mfrow=c(1,2))
#plot number of detected KO designs for TetR(-TET)
plot(x=0:3,y=tetR.non.depleted.designs.vector[1:4],type="o",xlab="Cycle",
     ylab="# mutant designs",col="red",ylim=c(5000,8050),lty=2,pch=20,cex=1.5,cex.lab=1.5, cex.axis=1.5)
#add number of detected KO designs for TetR(+TET)
points(x=0:3,y=tetR.non.depleted.designs.vector[c(1,5:7)],type="o",col="red4",pch=20)
#add number of detected KO designs for WT(-TET)
points(x=0:3,y=wt.non.depleted.designs.vector[1:4],type="o",col="blue",lty=2,pch=20)
#add number of detected KO designs for WT(+TET)
points(x=0:3,y=wt.non.depleted.designs.vector[c(1,5:7)],type="o",col="blue4",pch=20)
legend(x=0,y=6800,legend=c("TetR(+)","TetR(-)","WT(+)","WT(-)"),cex=1.1,col=c("red4","red","blue4","blue"),pch=rep(20,4),box.lwd = 0,lty=c(1,2,1,2))
#perform similar analysis to identify genes whose KOs were undetectable
#a gene was considered undetected (ie,depleted) if its two KO designs were labeled as depleted in the previous analysis
#initialize list to store name of genes labeled as depleted in TetR library
tetR.depleted.genes<-NULL
tetR.depleted.genes[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected genes per sample/time point
tetR.non.depleted.genes.count.vector<-c()
for(tetR.sample in 1:7)
{
  #KO designs labeled as depleted in current sample
  depleted.designs.current.sample<-tetR.depleted.designs.time.series[[tetR.sample]]
  #count number of depleted designs associated with each gene
  depleted.design.to.gene.map<-tetR.ko.library.count.matrix.with.gene.info[which(rownames(tetR.ko.library.count.matrix.with.gene.info)%in%depleted.designs.current.sample),"gene"]
  temporal.gene.count.table<-table(depleted.design.to.gene.map)
  #define set of depleted genes
  tetR.depleted.genes[[tetR.sample]]<-names(which(temporal.gene.count.table==2))
  tetR.non.depleted.genes.count.vector<-c(tetR.non.depleted.genes.count.vector,4153-length(tetR.depleted.genes[[tetR.sample]]))
}
#initialize list to store names of genes labeled as depleted in WT library
wt.depleted.genes<-NULL
wt.depleted.genes[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected genes per sample/time point
wt.non.depleted.genes.count.vector<-c()
for(wt.sample in 1:7)
{
  #KO designs labeled as depleted in current sample
  depleted.designs.current.sample<-wt.depleted.designs.time.series[[wt.sample]]
  #count number of depleted designs associated with each gene
  depleted.design.to.gene.map<-wt.ko.library.count.matrix.with.gene.info[which(rownames(wt.ko.library.count.matrix.with.gene.info)%in%depleted.designs.current.sample),"gene"]
  temporal.gene.count.table<-table(depleted.design.to.gene.map)
  #define set of depleted genes
  wt.depleted.genes[[wt.sample]]<-names(which(temporal.gene.count.table==2))
  wt.non.depleted.genes.count.vector<-c(wt.non.depleted.genes.count.vector,4153-length(wt.depleted.genes[[wt.sample]]))
}
#create right panel of Fig. 3A
#plot number of detected genes for TetR(-TET)
plot(x=0:3,y=tetR.non.depleted.genes.count.vector[1:4],type="o",xlab="Cycle",
     ylab="# detected genes",col="red",ylim=c(3200,4120),lty=2,pch=20,cex=1.5,cex.lab=1.5, cex.axis=1.5)
#add number of detected genes for TetR(+TET)
points(x=0:3,y=tetR.non.depleted.genes.count.vector[c(1,5:7)],type="o",col="red4",pch=20)
#add number of detected genes for WT(-TET)
points(x=0:3,y=wt.non.depleted.genes.count.vector[1:4],type="o",col="blue",lty=2,pch=20)
#add number of detected genes for WT(+TET)
points(x=0:3,y=wt.non.depleted.genes.count.vector[c(1,5:7)],type="o",col="blue4",pch=20)
legend(x=0,y=3800,legend=c("TetR(+)","TetR(-)","WT(+)","WT(-)"),cex=1.1,col=c("red4","red","blue4","blue"),pch=rep(20,4),box.lwd = 0,lty=c(1,2,1,2))

#print summary table
output.summary<-cbind(tetR.non.depleted.designs.vector,tetR.non.depleted.genes.count.vector,wt.non.depleted.designs.vector,wt.non.depleted.genes.count.vector)
colnames(output.summary)<-c("TetR-detected KO designs","TetR-detected genes","WT-detected KO designs","WT-detected genes")
print(output.summary)
      TetR-detected KO designs TetR-detected genes WT-detected KO designs WT-detected genes
T0                        7993                4106                   7908              4085
T1(-)                     7911                4084                   7859              4069
T2(-)                     7845                4066                   7814              4058
T3(-)                     7842                4066                   7789              4054
T1(+)                     7933                4087                   7868              4075
T2(+)                     7465                3973                   7887              4078
T3(+)                     5359                3343                   7543              3978

3.2 Create Fig. 3B

#write CSV file with gene name and locus tag of all genes identified as depleted in the last time point (T3) of TetR+TET competition experiment due to tetracycline (this set does not include genes not detected at T0 or T3-TET)
#this file is the input for DAVID (https://david.ncifcrf.gov/home.jsp) functional annotation clustering analysis
#genes.depleted.in.tetR.due.to.tet<-setdiff(tetR.depleted.genes[["T3(+)"]],union(tetR.depleted.genes[["T0"]],tetR.depleted.genes[["T3(-)"]]))
#write.csv(file="../Data/KO_library_competition/processed_count_data/tetR_depleted_genes_t3_due_to_tetracycline.csv",cbind(genes.depleted.in.tetR.due.to.tet,translate.gene.name.to.locus(genes.depleted.in.tetR.due.to.tet)))
#create barplot for Fig 3B (using DAVID output)
par(mfrow=c(1,2))
plot.new()
barplot(c(116,74,28,27,24,18,16,11,11)
          ,cex.lab=1.25,cex.axis=1.25,cex.names = 1,xlab="# genes",
names=c("Nucleotide/ATP binding","Ribosome/Translation/rRNA-binding","LPS biosynthesis","Cell division","Cell wall org./cell shape","Biosynthesis of nucleotide sugars","DNA replication","Ribosome biogenesis","Ubiquinone biosynthesis"),col=rainbow(9),las=2,horiz = T,xlim=c(0,120),main="Fig. 3B")

3.3 Load and process ALDEx2 output

#the script below runs ALDEx2 and generates relevant outputs (eg, T1 vs T0 with and without tetracycline in the TetR and WT libraries)
#source("aldex2_analysis.R")
#load ALDEx2 output
load("../Data/KO_library_competition/processed_count_data/ALDEx2_output.RData")
#notation of the ALDEx2 output: strain(tetR/wt).ko.iqlr(for inter-quartile log ratio).c0(this is T0).c#i(where i is 1 or 2 for T1 or T2).plus/minus(tetracycline treatment)
#function to define genes whose deletion impact fitness 
#that is genes for which all of their non-depleted designs were over- or under-represented in the ALDEx2 test
define.high.confindence.genes<-function(design.set,aldex.output.matrix,read.count.matrix)
{
  output<-c()
  #we need to define the genes associated with the differentially abundant designs (design.set defined using ALDEx2)
  input.design.to.gene.map<-read.count.matrix[which(rownames(read.count.matrix)%in%design.set),"gene"]
  genes.in.selected.designs<-unique(input.design.to.gene.map)
  #we also need to define how many designs were associated with each gene in ALDEx2 input matrix (rownames of the aldex.output.matrix)
  full.matrix.design.to.gene.map<-read.count.matrix[which(rownames(read.count.matrix)%in%rownames(aldex.output.matrix)),"gene"]
  #loop to evaluate which genes were differentially abundant 
  for(g in genes.in.selected.designs)
  {
    if(length(which(input.design.to.gene.map==g))==length(which(full.matrix.design.to.gene.map==g)))
    {
      output<-c(output,g)
    }
  }
  output
}  
#############################################################
#adjusted p-value (qvalue) and ALDEx2-estimated effect thresholds used in the analysis
qval.threshold<-0.1
effect.threshold<-2
#process ALDEx2 output for TetR+TET (T1) vs TetR (T0) (tetR.ko.iqlr.c0.c1.plus)
#identify under-represented (deleterious) KO designs
tetR.deleterious.feautures.c0.c1.plus<-rownames(tetR.ko.iqlr.c0.c1.plus)[which(tetR.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold & tetR.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold &  tetR.ko.iqlr.c0.c1.plus$effect < -1*effect.threshold)]
#define genes whose deletions were deleterious
tetR.deleterious.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.deleterious.feautures.c0.c1.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c1.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
tetR.beneficial.feautures.c0.c1.plus<-rownames(tetR.ko.iqlr.c0.c1.plus)[which(tetR.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold  & tetR.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold  & tetR.ko.iqlr.c0.c1.plus$effect > effect.threshold)]
#define genes whose deletions were beneficial
tetR.beneficial.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.beneficial.feautures.c0.c1.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c1.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#remove genes with KO designs labeled as beneficial AND deleterious
tetR.deleterious.deletions.c0.c1.plus.final<-setdiff(tetR.deleterious.deletions.c0.c1.plus.high.confidence,tetR.beneficial.deletions.c0.c1.plus.high.confidence)
tetR.beneficial.deletions.c0.c1.plus.final<-setdiff(tetR.beneficial.deletions.c0.c1.plus.high.confidence,tetR.deleterious.deletions.c0.c1.plus.high.confidence)
#############################################################
#process ALDEx2 output for TetR+TET (T2) vs TetR (T0) (tetR.ko.iqlr.c0.c2.plus)
#identify under-represented (deleterious) KO designs 
tetR.deleterious.feautures.c0.c2.plus<-rownames(tetR.ko.iqlr.c0.c2.plus)[which(tetR.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold  & tetR.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold & tetR.ko.iqlr.c0.c2.plus$effect < -1*effect.threshold)]
#identify genes whose deletions were deleterious
tetR.deleterious.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.deleterious.feautures.c0.c2.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c2.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
tetR.beneficial.feautures.c0.c2.plus<-rownames(tetR.ko.iqlr.c0.c2.plus)[which(tetR.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold & tetR.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold & tetR.ko.iqlr.c0.c2.plus$effect > effect.threshold)]
#identify genes whose deletions were beneficial
tetR.beneficial.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.beneficial.feautures.c0.c2.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c2.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#remove genes with mutant designs labeled as beneficial AND deleterious
tetR.deleterious.deletions.c0.c2.plus.final<-setdiff(tetR.deleterious.deletions.c0.c2.plus.high.confidence,tetR.beneficial.deletions.c0.c2.plus.high.confidence)
tetR.beneficial.deletions.c0.c2.plus.final<-setdiff(tetR.beneficial.deletions.c0.c2.plus.high.confidence,tetR.deleterious.deletions.c0.c2.plus.high.confidence)
#consolidate results for the TetR library
#gene deletions with opposite effects at different time points were not considered
tetR.deleterious.aldex.compilation<-setdiff(union(tetR.deleterious.deletions.c0.c1.plus.final,tetR.deleterious.deletions.c0.c2.plus.final),union(tetR.beneficial.deletions.c0.c1.plus.final,tetR.beneficial.deletions.c0.c2.plus.final))
tetR.beneficial.aldex.compilation<-setdiff(union(tetR.beneficial.deletions.c0.c1.plus.final,tetR.beneficial.deletions.c0.c2.plus.final),union(tetR.deleterious.deletions.c0.c1.plus.final,tetR.deleterious.deletions.c0.c2.plus.final))
print(paste("TetR had",length(tetR.deleterious.aldex.compilation),"genes whose deletions were deleterious",sep=" "))
[1] "TetR had 308 genes whose deletions were deleterious"
print(paste("TetR had",length(tetR.beneficial.aldex.compilation),"genes whose deletions were beneficial",sep=" "))
[1] "TetR had 953 genes whose deletions were beneficial"
#zero KO designs were identified as differentially abundant at T1(-TET) or T2(-TET) vs T0
#############################################################
#process ALDEx2 output for WT+TET (T1) vs WT (T0) (wt.ko.iqlr.c0.c1.plus)
#identify under-represented (deleterious) KO designs
wt.deleterious.feautures.c0.c1.plus<-rownames(wt.ko.iqlr.c0.c1.plus)[which(wt.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$effect < -1*effect.threshold)]
#identify genes whose deletions were deleterious
wt.deleterious.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =wt.deleterious.feautures.c0.c1.plus, aldex.output.matrix = wt.ko.iqlr.c0.c1.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
wt.beneficial.feautures.c0.c1.plus<-rownames(wt.ko.iqlr.c0.c1.plus)[which(wt.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$effect > effect.threshold)]
#identify genes whose deletions were beneficial
wt.beneficial.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =wt.beneficial.feautures.c0.c1.plus, aldex.output.matrix = wt.ko.iqlr.c0.c1.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#remove genes with mutant designs identified as both beneficial AND deleterious
wt.deleterious.deletions.c0.c1.plus.final<-setdiff(wt.deleterious.deletions.c0.c1.plus.high.confidence,wt.beneficial.deletions.c0.c1.plus.high.confidence)
wt.beneficial.deletions.c0.c1.plus.final<-setdiff(wt.beneficial.deletions.c0.c1.plus.high.confidence,wt.deleterious.deletions.c0.c1.plus.high.confidence)
#############################################################
#process ALDEx2 output for WT+TET (T2) vs WT (T0) (wt.ko.iqlr.c0.c2.plus)
#identify under-represented (deleterious) KO designs
wt.deleterious.feautures.c0.c2.plus<-rownames(wt.ko.iqlr.c0.c2.plus)[which(wt.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$effect < -1*effect.threshold )]
#identify genes whose deletions were deleterious
wt.deleterious.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =wt.deleterious.feautures.c0.c2.plus, aldex.output.matrix = wt.ko.iqlr.c0.c2.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
wt.beneficial.feautures.c0.c2.plus<-rownames(wt.ko.iqlr.c0.c2.plus)[which(wt.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$effect > effect.threshold)]
#identify genes whose deletions were beneficial
wt.beneficial.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =wt.beneficial.feautures.c0.c2.plus, aldex.output.matrix = wt.ko.iqlr.c0.c2.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#remove genes with mutant designs identified as both beneficial AND deleterious
wt.deleterious.deletions.c0.c2.plus.final<-setdiff(wt.deleterious.deletions.c0.c2.plus.high.confidence,wt.beneficial.deletions.c0.c2.plus.high.confidence)
wt.beneficial.deletions.c0.c2.plus.final<-setdiff(wt.beneficial.deletions.c0.c2.plus.high.confidence,wt.deleterious.deletions.c0.c2.plus.high.confidence)
#consolidate results for the WT library
wt.deleterious.aldex.compilation<-union(wt.deleterious.deletions.c0.c1.plus.final,wt.deleterious.deletions.c0.c2.plus.final)
wt.beneficial.aldex.compilation<-union(wt.beneficial.deletions.c0.c1.plus.final,wt.beneficial.deletions.c0.c2.plus.final)
print(paste("WT had",length(wt.deleterious.aldex.compilation),"genes whose deletions were deleterious",sep=" "))
[1] "WT had 208 genes whose deletions were deleterious"
print(paste("WT had",length(wt.beneficial.aldex.compilation),"genes whose deletions were beneficial",sep=" "))
[1] "WT had 155 genes whose deletions were beneficial"

3.4 Fig. 3C

#plot profiles of changes in abundance (with respect to T0) for selected genes (one KO design per gene)
selected.genes<-c("atpA","frdC","acrZ","mdtA","uvrA","arcA","phoP","rpoS","cytR")
#corresponding KO designs (in the same order of the selected genes)
selected.designs<-c("6417560","6418918","6423156","6417461","6422556","6405971","6421304","6414543","6417292")
par(mfrow=c(3,3))
#loop to generate each subpanel
for(i in 1:length(selected.genes))
  {
    current.design<-selected.designs[i]
    current.gene<-selected.genes[i]
    #profile of abundance difference of current gene in  antibiotic-free TetR library
    tetR.untreated.vector<-c(0,tetR.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.minus[current.design,"diff.btw"]) 
    #profile of abundance difference of current gene in  tetracycline-treated TetR library
    tetR.treated.vector<-c(0,tetR.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
    #profile of abundance difference of current gene in  antibiotic-free WT library
    wt.untreated.vector<-c(0,wt.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.minus[current.design,"diff.btw"])
    #profile of abundance difference of current gene in  tetracycline-treated WT library
    wt.treated.vector<-c(0,wt.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
    #create plot for current gene
    plot(y=tetR.untreated.vector,x=0:2,col="red",lty=2,ylim=range(c(tetR.untreated.vector,tetR.treated.vector,wt.untreated.vector,wt.treated.vector),na.rm = T),main=paste(current.gene,current.design,sep="-"),type="o",ylab=expression(paste(Delta,"abundance (vs t0)")),xlab="Cycle",cex=1.1,cex.lab=1.1, cex.axis=1.1)
    points(y=tetR.treated.vector,x=0:2,col="red4",type="o")
    points(y=wt.treated.vector,x=0:2,col="blue4",type="o")
    points(y=wt.untreated.vector,x=0:2,col="blue",type="o",lty=2)
  }

3.5 Fig. S4

#create Fig. S4A
#compare genes whose deletion affect fitness on TetR and/or WT backgrounds
#comparison of genes associated with deleterious KO deletions
conserved.deleterious.genes<-intersect(wt.deleterious.aldex.compilation,tetR.deleterious.aldex.compilation)
tetR.unique.deleterious.genes<-setdiff(tetR.deleterious.aldex.compilation,wt.deleterious.aldex.compilation)
wt.unique.deleterious.genes<-setdiff(wt.deleterious.aldex.compilation,tetR.deleterious.aldex.compilation)
#comparison of genes associated with beneficial KO deletions
conserved.beneficial.genes<-intersect(wt.beneficial.aldex.compilation,tetR.beneficial.aldex.compilation)
tetR.unique.beneficial.genes<-setdiff(tetR.beneficial.aldex.compilation,wt.beneficial.aldex.compilation)
wt.unique.beneficial.genes<-setdiff(wt.beneficial.aldex.compilation,tetR.beneficial.aldex.compilation)
#compile name of genes that impact fitness in the WT and/or TetR strains during tetracycline treatment
differentially.abundant.gene.compilation<-unique(c(conserved.beneficial.genes,wt.unique.beneficial.genes,tetR.unique.beneficial.genes,conserved.deleterious.genes,wt.unique.deleterious.genes,tetR.unique.deleterious.genes))
#create matrix for heatmap
input.matrix.for.heatmap<-matrix(nrow=length(differentially.abundant.gene.compilation),ncol=2,dimnames = list(differentially.abundant.gene.compilation,c("WT","TetR")),0)
#fill out input matrix
#the notation is: +1 = beneficial deletion, -1 = deleterious deletion, 0 = neutral
for(gene in differentially.abundant.gene.compilation)
{
  #check if deletion of current gene is beneficial in the WT background
  if(gene %in% wt.beneficial.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"WT"]<-1
  }
  #check if deletion of current gene is deleterious in the WT background
  if(gene %in% wt.deleterious.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"WT"]<- -1
  }
  #check if deletion of current gene is beneficial in the TetR background
  if(gene %in% tetR.beneficial.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"TetR"]<-1
  }
  #check if deletion of current gene is deleterious in the WT background
  if(gene %in% tetR.deleterious.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"TetR"]<- -1
  }
}
pheatmap(input.matrix.for.heatmap,scale="none",color = colorRampPalette(rev(brewer.pal(3, "PiYG")) )(3)[3:1],cluster_rows=F,cluster_cols = F,fontsize = 5,angle_col = 90,legend=T, main="Fig. S4A",breaks =c(-1,-0.0001,0.0001,1),legend_breaks = c(-1,-0.0001,0.0001,1),legend_labels = c("-1","0","0","+1"),show_rownames=FALSE)
#create Fig. S4B 
#write CSV files with TetR-specific deleterious and beneficial gene deletions
#write.csv(file="../Data/KO_library_competition/processed_count_data/tetR_specific_beneficial_genes_082422.csv",cbind(translate.gene.name.to.locus(tetR.unique.beneficial.genes),tetR.unique.beneficial.genes))
#write.csv(file="../Data/KO_library_competition/processed_count_data/tetR_specific_deleterious_genes_082422.csv",cbind(translate.gene.name.to.locus(tetR.unique.deleterious.genes),tetR.unique.deleterious.genes))
#based on output from DAVID functional enrichment analysis
#create barplot 
par(mfrow=c(1,2))

plot.new()
barplot(c(40,29,13,6,9),cex.lab=1.1,cex.axis=1.1,cex.names = 1.1,xlab="Number of genes",
names=c("Fe/other ion transport","Pilus-cell adhesion","TCA","Enterobactin biosynthesis","Ubiquinone/other terpenoid-quinone biosynthesis"),col=c("#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854"),las=2,horiz = T,xlim=c(0,42),main="Fig. S4B")

#create Fig. S4C
#heatmap for TFs whose deletion affect fitness of TetR and/or WT
#define names of E. coli TFs 
ecoli.tfs<-convert.locus.to.gene.name(tf.names)
#manually update TF names to include gene name synonym used by InscriptaResolver
ecoli.tfs[which(ecoli.tfs=="rpiR")]<-"alsR"
ecoli.tfs[which(ecoli.tfs=="ycgE")]<-"bluR"
ecoli.tfs[which(ecoli.tfs=="ycfQ")]<-"comR"
ecoli.tfs[which(ecoli.tfs=="fruR")]<-"cra"
ecoli.tfs[which(ecoli.tfs=="yfhA")]<-"glrR"
ecoli.tfs[which(ecoli.tfs=="yjiE")]<-"hypT"
ecoli.tfs[which(ecoli.tfs=="matA")]<-"ecpR"
ecoli.tfs[which(ecoli.tfs=="chpR")]<-"mazE"
ecoli.tfs[which(ecoli.tfs=="dgsA")]<-"mlc"
ecoli.tfs[which(ecoli.tfs=="ygiT")]<-"mqsA"
ecoli.tfs[which(ecoli.tfs=="yfeT")]<-"murR"
ecoli.tfs[which(ecoli.tfs=="ycjZ")]<-"pgrR"
ecoli.tfs[which(ecoli.tfs=="ybjK")]<-"rcdA"
ecoli.tfs[which(ecoli.tfs=="ydcN")]<-"sutR"
ecoli.tfs[which(ecoli.tfs=="yehT")]<-"btsR"
ecoli.tfs[which(ecoli.tfs=="yqjI")]<-"nfeR"
#identify TFs whose deletion affect fitness of TetR and/or WT
differentially.abundant.tfs<-intersect(ecoli.tfs,differentially.abundant.gene.compilation)
#create matrix with changes in abundance (delta) with respect to T0 for selected TFs during tetracycline treatment
tf.deletion.delta.abundance.matrix<-c()
#vector to record order of TFs (important for next step)
order.of.tfs.in.matrix<-c()
#identify KO designs associated with differentially abundant TFs
tf.ko.designs<-rownames(tetR.ko.library.count.matrix.with.gene.info)[which(tetR.ko.library.count.matrix.with.gene.info[,"gene"]%in% differentially.abundant.tfs)]
#fill out tf.deletion.delta.abundance.matrix
for(d in tf.ko.designs)
{
    current.design.delta.abundance.profile<-cbind(wt.ko.iqlr.c0.c1.plus[d,"diff.btw"],wt.ko.iqlr.c0.c2.plus[d,"diff.btw"],tetR.ko.iqlr.c0.c1.plus[d,"diff.btw"],tetR.ko.iqlr.c0.c2.plus[d,"diff.btw"])
    tf.deletion.delta.abundance.matrix<-rbind(tf.deletion.delta.abundance.matrix,
                                              current.design.delta.abundance.profile)
    order.of.tfs.in.matrix<-c(order.of.tfs.in.matrix,tetR.ko.library.count.matrix.with.gene.info[d,"gene"])
}
#rename matrix using TF names
rownames(tf.deletion.delta.abundance.matrix)<-order.of.tfs.in.matrix
#re-order matrix rows by TF names
tf.deletion.delta.abundance.matrix<-tf.deletion.delta.abundance.matrix[order(rownames(tf.deletion.delta.abundance.matrix)),]
#compute mean difference in abundance for TFs with two KO designs
average.tf.deletion.delta.abundance.matrix<-c()
for(tf in unique(rownames(tf.deletion.delta.abundance.matrix)))
{
  current.tf.position<-which(rownames(tf.deletion.delta.abundance.matrix)==tf)
  if(length(current.tf.position)==2)
  {
    average.tf.deletion.delta.abundance.matrix<-rbind(average.tf.deletion.delta.abundance.matrix,colMeans(tf.deletion.delta.abundance.matrix[current.tf.position,],na.rm = T))
  }
  else
  {
    average.tf.deletion.delta.abundance.matrix<-rbind(average.tf.deletion.delta.abundance.matrix,tf.deletion.delta.abundance.matrix[current.tf.position,])
  }
}
rownames(average.tf.deletion.delta.abundance.matrix)<-unique(rownames(tf.deletion.delta.abundance.matrix))
#replace NAs (that occur for those time points in which a gene KOs were not detected) with a "20" value
average.tf.deletion.delta.abundance.matrix[which(is.na(average.tf.deletion.delta.abundance.matrix))]<-20
#add column names
colnames(average.tf.deletion.delta.abundance.matrix)<-c("WT.t1","WT.t2","TetR.t1","TetR.t2")
#define heatmap breaks
heatmap.breaks<-c(-10,-4,-2,-1,-0.5,0,0.5,1,2,4,10,20)
#generate heatmap
pheatmap(average.tf.deletion.delta.abundance.matrix,scale="none",color =c(colorRampPalette(rev(brewer.pal(10, "PiYG")) )(10)[10:1],"black"),
         cluster_rows=F,cluster_cols = F,fontsize = 5,angle_col = 90,
legend=T,breaks =heatmap.breaks,legend_breaks = heatmap.breaks,main="Fig. S4C")

3.6 Fig. S5

#plot profiles of changes in abundance (with respect to T0) for selected genes (one KO design per gene)
selected.genes<-c("menB","frdB","hybB","mdh")
#corresponding KO designs (in the same order of the selected genes)
selected.designs<-c("6419317","6416702","6418292","6419091")
#loop to generate each subpanel
par(mfrow=c(2,2))
for(i in 1:length(selected.genes))
{
  current.gene<-selected.genes[i]
  current.design<-selected.designs[i]
  #profile of abundance difference of current gene in  antibiotic-free TetR library
  tetR.untreated.vector<-c(0,tetR.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.minus[current.design,"diff.btw"])
  #profile of abundance difference of current gene in  tetracycline-treated TetR library
  tetR.tetracycline.treated.vector<-c(0,tetR.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
  #profile of abundance difference of current gene in  antibiotic-free WT library
  wt.untreated.vector<-c(0,wt.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.minus[current.design,"diff.btw"])
  #profile of abundance difference of current gene in  tetracycline-treated WT library
  wt.tetracycline.treated.vector<-c(0,wt.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
  #create plot for current gene
  plot(y=tetR.untreated.vector,x=0:2,col="red",lty=2,ylim=range(c(tetR.untreated.vector,tetR.tetracycline.treated.vector,wt.untreated.vector,wt.tetracycline.treated.vector),na.rm = T),main=paste(current.gene,current.design,sep="-"),type="o",ylab=expression(paste(Delta,"abundance (vs t0)")),xlab="Cycle",cex=1.1,cex.lab=1.1, cex.axis=1.1)
  points(y=tetR.tetracycline.treated.vector,x=0:2,col="red4",type="o")
  points(y=wt.untreated.vector,x=0:2,col="blue",type="o",lty=2)
  points(y=wt.tetracycline.treated.vector,x=0:2,col="blue4",type="o")
}

3.7 Evaluate overlap between TetR dropouts at T3 and ALDEx2 results

#identify genes labeled as undetectable at the end of the experiment with tetracycline
#exclude genes that were not detected at the beginning of the experiment (T0) and the end of the experiment with no tetracycline (T3-TET)
tetR.depleted.genes.due.to.tetracycline<-setdiff(tetR.depleted.genes[["T3(+)"]],union(tetR.depleted.genes[["T0"]],tetR.depleted.genes[["T3(-)"]]))
#overlap with TetR-specific genes with deleterious deletions
overlapping.genes<-intersect(tetR.depleted.genes.due.to.tetracycline,tetR.unique.deleterious.genes)
print(paste("There are",length(overlapping.genes),"genes in common",sep=" "))
[1] "There are 55 genes in common"
#perform hypergeometric test
#define gene universe. First, we need to identify which genes were included in the ALDEx2 comparisons for T1(+TET) vs T0 and T2(+TET) vs T0
aldex2.designs.included.in.comparisons<-union(rownames(tetR.ko.iqlr.c0.c1.plus),rownames(tetR.ko.iqlr.c0.c2.plus))
#remove two control KO designs not associated with any gene
aldex2.designs.included.in.comparisons<-setdiff(aldex2.designs.included.in.comparisons,c("500955653","500955654"))
#define the genes associated with the KO designs
gene.universe.for.test<-unique(tetR.ko.library.count.matrix.with.gene.info[aldex2.designs.included.in.comparisons,"gene"])
#run hypergeometric test
q<-length(overlapping.genes)
m<-length(intersect(gene.universe.for.test,tetR.depleted.genes.due.to.tetracycline))
n<-length(gene.universe.for.test)-m
k<-length(intersect(tetR.unique.deleterious.genes,gene.universe.for.test))
pvalue.overlap<-phyper(q-1,m,n,k,lower.tail = F)
print(paste("Overlap P-value:",round(pvalue.overlap,digits=3),sep=" "))
[1] "Overlap P-value: 0.012"

3.8 Create individual tables for Data Set S2 (with information about undetectable genes)

#alternative function to convert gene names into corresponding loci
#this function uses E. coli genome annotation to extract locus tag
convert.gene.name.to.locus.tag<-function(geneNames)
{
  output<-c()
  #read E. coli genome information
  ecoli.genome<-read.csv("../Data/Miscellaneous_files/ecoli_gene_ids.csv",header=T)
  #define locus tag for each gene of the input set
  for(g in geneNames)
  {
    #first, evaluate if the current name is the standard gene symbol
    current.gene.position<-which(ecoli.genome$Symbol == g)
    #second, evaluate if the current name is a synonym 
    current.gene.alternative.position<-grep(g,ecoli.genome$Aliases)
    #for the fist case, extract locus id 
    if(length(current.gene.position)==1)
    {
      output<-c(output,strsplit(ecoli.genome$Aliases[current.gene.position],split = ",")[[1]][1])
    }
    #in the second scenario
    if(length(current.gene.position)==0) 
    {
      #extract the locus id
      if(length(current.gene.alternative.position)==1)
      {
        output<-c(output,strsplit(ecoli.genome$Aliases[current.gene.alternative.position],split = ",")[[1]][1])
      }
    #if the current name was not found or more than one potential loci were identified, keep the original gene name   
    if(length(current.gene.alternative.position)!=1)
    {
      output<-c(output,g)
    }
    }
  }
  output
}
#write CSV files with genes depleted along the TetR experiment
for(i in 1:7)
{
  #create table with name of depleted genes and the corresponding loci
  temporal.table<-cbind(tetR.depleted.genes[[i]],convert.gene.name.to.locus.tag(tetR.depleted.genes[[i]]))
  colnames(temporal.table)<-c("Gene Name","Locus")
  write.csv(file=paste("../../Supplement/Dataset S2/TetR_",names(tetR.depleted.genes)[i],"_depleted_genes.csv",sep=""),temporal.table,quote = F,row.names = F)
}
#write CSV file with information about overlap between time points
tetR.depleted.genes.comparison.table<-matrix(nrow=7,ncol=7,dimnames=list(names(tetR.depleted.genes),names(tetR.depleted.genes)),0)
for(r in 1:7)
{
  for(c in 1:7)
  {
    tetR.depleted.genes.comparison.table[r,c]<-length(intersect(tetR.depleted.genes[[r]],tetR.depleted.genes[[c]]))
  }
}
 write.csv(file="../../Supplement/Dataset S2/TetR_gene_depletion_comparison.csv",tetR.depleted.genes.comparison.table)
#write CSV files with genes depleted in the WT experiments
for(i in 1:7)
{
  temporal.table<-cbind(wt.depleted.genes[[i]],convert.gene.name.to.locus.tag(wt.depleted.genes[[i]]))
  colnames(temporal.table)<-c("Gene Name","Locus")
  write.csv(file=paste("../../Supplement/Dataset S2/WT_",names(wt.depleted.genes)[i],"_depleted_genes.csv",sep=""),temporal.table,quote = F,row.names = F)
}
#write CSV file with information about overlap between time points
wt.depleted.genes.comparison.table<-matrix(nrow=7,ncol=7,dimnames=list(names(wt.depleted.genes),names(wt.depleted.genes)),0)
for(r in 1:7)
{
  for(c in 1:7)
  {
    wt.depleted.genes.comparison.table[r,c]<-length(intersect(wt.depleted.genes[[r]],wt.depleted.genes[[c]]))
  }
}
 write.csv(file="../../Supplement/Dataset S2/WT_gene_depletion_comparison.csv",wt.depleted.genes.comparison.table)

3.9 Create single files for Data Set S1

#write CSV files with raw read counts for KO designs in WT and TetR KO libraries
temporal.table<-cbind(rownames(tetR.ko.read.counts.numeric),tetR.ko.library.count.matrix.with.gene.info[rownames(tetR.ko.read.counts.numeric),"gene"],tetR.ko.read.counts.numeric)
colnames(temporal.table)[1:6]<-c("Design ID","Gene","C0-a","C0-b","C0-c","C0-d")
write.csv(file="../../Supplement/Dataset S1/TetR_raw_read_counts.csv",temporal.table,quote = F,row.names = F)
#repeat for WT
temporal.table<-cbind(rownames(wt.ko.read.counts.numeric),wt.ko.library.count.matrix.with.gene.info[rownames(wt.ko.read.counts.numeric),"gene"],wt.ko.read.counts.numeric)
colnames(temporal.table)[1:6]<-c("Design ID","Gene","C0-a","C0-b","C0-c","C0-d")
write.csv(file="../../Supplement/Dataset S1/WT_raw_read_counts.csv",temporal.table,quote = F,row.names = F)
#save CSV files with ALDEx2 output
#tetR KO library
write.csv(file="../../Supplement/Dataset S1/TetR_t1+tet_vs_t0.csv",tetR.ko.iqlr.c0.c1.plus)
write.csv(file="../../Supplement/Dataset S1/TetR_t1-tet_vs_t0.csv",tetR.ko.iqlr.c0.c1.minus)
write.csv(file="../../Supplement/Dataset S1/TetR_t2+tet_vs_t0.csv",tetR.ko.iqlr.c0.c2.plus)
write.csv(file="../../Supplement/Dataset S1/TetR_t2-tet_vs_t0.csv",tetR.ko.iqlr.c0.c2.minus)
#wt KO library
write.csv(file="../../Supplement/Dataset S1/WT_t1+tet_vs_t0.csv",wt.ko.iqlr.c0.c1.plus)
write.csv(file="../../Supplement/Dataset S1/WT_t1-tet_vs_t0.csv",wt.ko.iqlr.c0.c1.minus)
write.csv(file="../../Supplement/Dataset S1/WT_t2+tet_vs_t0.csv",wt.ko.iqlr.c0.c2.plus)
write.csv(file="../../Supplement/Dataset S1/WT_t2-tet_vs_t0.csv",wt.ko.iqlr.c0.c2.minus)
#save CSV files with information of all genes affecting fitness in the TetR KO library
#this leverages previously constructed matrix that compiled effect of gene deletions
tetR.gene.KO.fitness.effect<-input.matrix.for.heatmap[which(input.matrix.for.heatmap[,"TetR"]!=0),"TetR"]
tetR.genes.impacting.fitness<-names(tetR.gene.KO.fitness.effect)
tetR.deletion.effect<-c()
tetR.specificity<-c()
for(r in 1:length(tetR.gene.KO.fitness.effect))
{
  if(tetR.gene.KO.fitness.effect[r]==1)
  {
  tetR.deletion.effect<-c(tetR.deletion.effect,"Positive")
  }
  if(tetR.gene.KO.fitness.effect[r]==-1)
  {
  tetR.deletion.effect<-c(tetR.deletion.effect,"Negative")
  }
  if(input.matrix.for.heatmap[tetR.genes.impacting.fitness[r],"WT"]==0)
  {
  tetR.specificity<-c(tetR.specificity,"Yes")
  }
  if(input.matrix.for.heatmap[tetR.genes.impacting.fitness[r],"WT"]!=0)
  {
  tetR.specificity<-c(tetR.specificity,"No")
  }
}
temporal.table<-cbind(tetR.genes.impacting.fitness,tetR.deletion.effect,tetR.specificity)
colnames(temporal.table)<-c("Gene","Deletion effect","TetR-specific?")
temporal.table<-temporal.table[order(temporal.table[,"Gene"]),]
write.csv(file="../../Supplement/Dataset S1/TetR_genes_impacting_fitness.csv",temporal.table,quote = F,row.names = F)
#create similar table for WT KO library
wt.gene.KO.fitness.effect<-input.matrix.for.heatmap[which(input.matrix.for.heatmap[,"WT"]!=0),"WT"]
wt.genes.impacting.fitness<-names(wt.gene.KO.fitness.effect)
wt.deletion.effect<-c()
wt.specificity<-c()
for(r in 1:length(wt.gene.KO.fitness.effect))
{
  if(wt.gene.KO.fitness.effect[r]==1)
  {
  wt.deletion.effect<-c(wt.deletion.effect,"Positive")
  }
  if(wt.gene.KO.fitness.effect[r]==-1)
  {
  wt.deletion.effect<-c(wt.deletion.effect,"Negative")
  }
  if(input.matrix.for.heatmap[wt.genes.impacting.fitness[r],"TetR"]==0)
  {
  wt.specificity<-c(wt.specificity,"Yes")
  }
  if(input.matrix.for.heatmap[wt.genes.impacting.fitness[r],"TetR"]!=0)
  {
  wt.specificity<-c(wt.specificity,"No")
  }
}
temporal.table<-cbind(wt.genes.impacting.fitness,wt.deletion.effect,wt.specificity)
colnames(temporal.table)<-c("Gene","Deletion effect","WT-specific?")
temporal.table<-temporal.table[order(temporal.table[,"Gene"]),]
write.csv(file="../../Supplement/Dataset S1/WT_genes_impacting_fitness.csv",temporal.table,quote = F,row.names = F)

4.1 Fig. 4A

#function to change format of time data so it can be read by Growthcurver
change.time.format<-function(time.vector)
{
  output<-c()
  for(x in time.vector)
  {
    #read time in the HH:MM:SS format and convert it to decimal format
    current.time.point<-strsplit(x,split = ":")[[1]]
    output<-c(output, as.numeric(current.time.point[1]) + ((as.numeric(current.time.point[3]) + (60*as.numeric(current.time.point[2])))/3600))
  }
  output<-round(output,digits = 3)
  output
}
#function to estimate the average and standard deviation (sd) of each strain (taking into account all of its replicates) at each time point
#the input data frame should have three columns: "Time","Strain" and "OD". The latter is the OD readings collected in the Bioscreen experiment
compute.mean.and.sd<-function(growth.df)
{
  output<-c()
  measured.time.points<-unique(growth.df$Time)
  #for each time point
  for(t in measured.time.points)
  {
    #for each strain
    for(s in unique(growth.df$Strain))
    {
     #compute mean OD reading value    
     average.current.strain.and.time.point<-mean(growth.df$OD[which(growth.df$Strain==s & growth.df$Time==t)])
     #compute corresponding SD
     sd.current.strain.and.time.point<-sd(growth.df$OD[which(growth.df$Strain==s & growth.df$Time==t)])
     output<-rbind(output,cbind(t,s,average.current.strain.and.time.point,sd.current.strain.and.time.point))
    }
  }
  #add column names
  colnames(output)<-c("Time","Strain","OD","sd")
  #make sure data frame format is good for downstream analyses
  output<-as.data.frame(output)
  output$Time<-as.numeric(as.vector(output$Time))
  output$OD<-as.numeric(as.vector(output$OD))
  output$sd<-as.numeric(as.vector(output$sd))
  output
}
#read OD readings from Bioscreen experiment to generate corresponding growth curves
#there were five strains in the experiment
#for each strain/tetracycline concentration, there are three biological replicates (each one with two replicates)
#bioscreen experiment was performed for a total of 48h
growth.data<-read.csv("../Data/Fitness_BioscreenC/20220504_formatted_data.csv",row.names=1)
#change time format
time.data<-change.time.format(rownames(growth.data))
#update row names with time in new format
rownames(growth.data)<-round(time.data,digits = 2)
#normalize data by subtracting minimum OD reading per well
growth.data.normalized<-c()
for(i in 1:ncol(growth.data))
{
  minimum.reading.current.well<-min(growth.data[,i])
  growth.data.normalized<-cbind(growth.data.normalized,
                                growth.data[,i]-minimum.reading.current.well)
}
#add column names
colnames(growth.data.normalized)<-colnames(growth.data)
#add row names
rownames(growth.data.normalized)<-rownames(growth.data)
#these are the tetracycline concentrations included in the experiment
tetracycline.concentrations<-seq(0,24,by=4) # ug/ml
#labels for the five strains in the dataset
strains<-c("WT","WTarcA","TetR","TetRarcA","pRB3arcA")
#color to be used for each strain
strain.colors<-c(WT="#66C2A5",WTarcA="#FC8D62",TetR="#8DA0CB",TetRarcA="#E78AC3",pRB3arcA="#A6D854")
#the OD data has been formatted such as the name of each well contains the information about the strain and the tetracycline concentration
#for example: TetR_20 means that the well was inoculated with the TetR strain and 20 ug/ml of tetracycline
#create Fig. 3A left panel (ie, antibiotic-free)
#create data frame
antibiotic.free.cultures.df<-c()
#loop to compile OD data for all five strains
for(s in strains)
{
  current.strain.concentration<-paste(s,0,sep="_")
  #identify the position of all replicates for the current strain/concentration combination
  replicates.positions<-grep(current.strain.concentration,colnames(growth.data.normalized))
  #add OD readings (first 24h) of the relevant replicates to the data frame
  for(r in replicates.positions)
  {
    antibiotic.free.cultures.df<-rbind(antibiotic.free.cultures.df,cbind(rownames(growth.data.normalized)[1:50],growth.data.normalized[1:50,r],rep(s,50)))
  }
}
#add column names
colnames(antibiotic.free.cultures.df)<-c("Time","OD","Strain")
#make sure data frame format is correct
antibiotic.free.cultures.df<-as.data.frame(antibiotic.free.cultures.df)
antibiotic.free.cultures.df$Strain <- as.factor(antibiotic.free.cultures.df$Strain)
antibiotic.free.cultures.df$Time<- as.numeric(as.vector(antibiotic.free.cultures.df$Time))
antibiotic.free.cultures.df$OD<- as.numeric(as.vector(antibiotic.free.cultures.df$OD))
#compute mean and sd per strain/concentration/time point combination
antibiotic.free.cultures.df.v2 <- compute.mean.and.sd(antibiotic.free.cultures.df)
#create ggplot object to generate final figure
antibiotic.free.subpanel<- ggplot(antibiotic.free.cultures.df.v2, aes(x=Time,y=OD, group=Strain,colour=Strain)) + 
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=OD-sd, ymax=OD+sd), width=0.75,alpha=0.75)+
  theme_classic()
#create Fig. 4A right panel
#treatment with 20 ug/ml of tetracycline
tet.concentration<- 20
#create data frame to store OD readings 
#only TetR, TetR DarcA and TetR DarcA + pRB3-arcA (episomal complemented strain) were treated with tetracycline
tetracycline.treated.cultures.df<-c()
  for(s in strains[3:5])
  {
    current.strain.concentration<-paste(s,tet.concentration,sep="_")
    #identify the position of all replicates for the current strain/concentration combination
    replicates.positions<-grep(current.strain.concentration,colnames(growth.data))
    #add OD readings (full 48h) for the relevant replicates to the data frame
    for(r in replicates.positions)
    {
      tetracycline.treated.cultures.df<-rbind(tetracycline.treated.cultures.df,cbind(rownames(growth.data.normalized),growth.data.normalized[,r],rep(s,nrow(growth.data.normalized))))
    }
  }
  #add column names  
  colnames(tetracycline.treated.cultures.df)<-c("Time","OD","Strain")
  #make sure data frame format is correct
  tetracycline.treated.cultures.df<-as.data.frame(tetracycline.treated.cultures.df)
  tetracycline.treated.cultures.df$Strain <- as.factor(tetracycline.treated.cultures.df$Strain)
  tetracycline.treated.cultures.df$Time<- as.numeric(as.vector(tetracycline.treated.cultures.df$Time))
  tetracycline.treated.cultures.df$OD<- as.numeric(as.vector(tetracycline.treated.cultures.df$OD))
  #compute mean and sd per strain/concentration/time point combination
  tetracycline.treated.cultures.df.v2 <- compute.mean.and.sd(tetracycline.treated.cultures.df)
  #create ggplot object to generate final figure
  tetracycline.treated.subpanel<- ggplot(tetracycline.treated.cultures.df.v2, aes(x=Time,y=OD, group=Strain,colour=Strain)) + 
    geom_line() +
    geom_point()+
    geom_errorbar(aes(ymin=OD-sd, ymax=OD+sd), width=0.75,alpha=0.15)+
    theme_classic()
  #generate Fig. 4A
grid.arrange(antibiotic.free.subpanel + scale_color_manual(values=strain.colors)+ 
theme(axis.text=element_text(size=10),axis.title = element_text(size=10,face="bold")) + xlab("Time (h)") + ylab("OD600")+ggtitle("[TET]=0"),tetracycline.treated.subpanel+ scale_color_manual(values=strain.colors[3:5])+ 
theme(axis.text=element_text(size=10),axis.title = element_text(size=10,face="bold")) + xlab("Time (h)") + ylab("OD600")+ggtitle("[TET]=20ug/ml"),nrow=1)

4.2 Fig. S3

#plot growth curves of WT and TetR in antibiotic-free cultures 
#create ggplot object
temporal.plot<- ggplot(antibiotic.free.cultures.df.v2[which(antibiotic.free.cultures.df.v2$Strain =="WT" | antibiotic.free.cultures.df.v2$Strain == "TetR"),]
                       , aes(x=Time,y=OD, group=Strain,colour=Strain)) + 
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=OD-sd, ymax=OD+sd), width=0.75,alpha=0.5)+
  theme_classic()
print(temporal.plot + scale_color_manual(values=strain.colors[c(1,3)])+ 
        theme(axis.text=element_text(size=10),axis.title = element_text(size=10,face="bold")) + xlab("Time (h)")+ylab("OD600")+ggtitle("Fig. S3"))

#estimate fitness parameters shown in Fig. S3 with Growthcurver (for the first 24h of growth)
time.vector<-as.vector(time.data)
input.matrix.growthcurver<-as.data.frame(cbind(time.data[1:50],growth.data.normalized[1:50,]))
#rename column with time values
colnames(input.matrix.growthcurver)[1]<-"time"
#run Growthcurver
growthcurver.output <- SummarizeGrowthByPlate(input.matrix.growthcurver)
#print estimated max growth rate (mu) for WT and TetR in antibiotic-free medium
print(paste("mu (WT in LB without tetracycline) was:",round(mean(growthcurver.output[grep("WT_0",growthcurver.output$sample),"r"]),digits=2),sep=""))
[1] "mu (WT in LB without tetracycline) was:1.15"
print(paste("mu (TetR in LB without tetracycline) was:",round(mean(growthcurver.output[grep("TetR_0",growthcurver.output$sample),"r"]),digits=2),sep=""))
[1] "mu (TetR in LB without tetracycline) was:0.8"
#print estimated area under the growth curve (AUC) values for WT and TetR in antibiotic-free medium
print(paste("AUC (WT in LB without tetracycline) was:",round(mean(growthcurver.output[grep("WT_0",growthcurver.output$sample),"auc_e"]),digits=2),sep=""))
[1] "AUC (WT in LB without tetracycline) was:30.29"
print(paste("AUC (TetR in LB without tetracycline) was:",round(mean(growthcurver.output[grep("TetR_0",growthcurver.output$sample),"auc_e"]),digits=2),sep=""))
[1] "AUC (TetR in LB without tetracycline) was:20.44"

4.3 Fig. 4B-C

#read OD data collected in experiment to evaluate the effect of arcA deletion in WT and TetR over three growth cycles
#in each cycle, cultures were started at an OD600 of 0.1 and grown to an OD600 of 1.0
#and thereafter diluted in fresh medium to OD600 of 0.1 to start a new cycle of growth
#three replicates per strain/antibiotic concentration were included
#last two columns are LB controls 
cyclic.growth.od<-read.csv("../Data/cyclic_growth/macrodilution_cyclic_growth_experiment_OD_data.csv",header=T)
#change format of time as before
cyclic.growth.od$Time<-change.time.format(cyclic.growth.od$Time)
par(mfrow=c(1,3))
#plot individual growth curves of TetR replicates in tetracycline-free LB
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_0_A!="")],y=cyclic.growth.od$TetR_0_A[which(cyclic.growth.od$TetR_0_A!="")],col=strain.colors["TetR"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,10),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=0")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_0_B!="")],y=cyclic.growth.od$TetR_0_B[which(cyclic.growth.od$TetR_0_B!="")],col=strain.colors["TetR"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_0_C!="")],y=cyclic.growth.od$TetR_0_C[which(cyclic.growth.od$TetR_0_C!="")],col=strain.colors["TetR"],pch=2,type="o")  
#plot individual growth curves of TetR DarcA replicates in tetracycline-free LB
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_0_A!="")],y=cyclic.growth.od$TetRarcA_0_A[which(cyclic.growth.od$TetRarcA_0_A!="")],col=strain.colors["TetRarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_0_B!="")],y=cyclic.growth.od$TetRarcA_0_B[which(cyclic.growth.od$TetRarcA_0_B!="")],col=strain.colors["TetRarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_0_C!="")],y=cyclic.growth.od$TetRarcA_0_C[which(cyclic.growth.od$TetRarcA_0_C!="")],col=strain.colors["TetRarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#plot individual growth curves of TetR replicates in LB with 20 ug/ml of tetracycline
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_20_A!="")],y=cyclic.growth.od$TetR_20_A[which(cyclic.growth.od$TetR_20_A!="")],col=strain.colors["TetR"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,40),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=20 ug/ml")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_20_B!="")],y=cyclic.growth.od$TetR_20_B[which(cyclic.growth.od$TetR_20_B!="")],col=strain.colors["TetR"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_20_C!="")],y=cyclic.growth.od$TetR_20_C[which(cyclic.growth.od$TetR_20_C!="")],col=strain.colors["TetR"],pch=2,type="o")  
#plot individual growth curves of TetR DarcA replicates in LB with 20 ug/ml
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_20_A!="")],y=cyclic.growth.od$TetRarcA_20_A[which(cyclic.growth.od$TetRarcA_20_A!="")],col=strain.colors["TetRarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_20_B!="")],y=cyclic.growth.od$TetRarcA_20_B[which(cyclic.growth.od$TetRarcA_20_B!="")],col=strain.colors["TetRarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_20_C!="")],y=cyclic.growth.od$TetRarcA_20_C[which(cyclic.growth.od$TetRarcA_20_C!="")],col=strain.colors["TetRarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#add legend
plot.new()
legend("center",col=strain.colors[c("TetR","TetRarcA")],legend = c("TetR",expression(paste("TetR ",Delta, "arcA"))),lty=1)

4.4 Fig. S6

#create same plot for WT and WT DarcA
par(mfrow=c(1,3))
#plot individual growth curves of WT replicates in tetracycline-free LB
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0_A!="")],y=cyclic.growth.od$WT_0_A[which(cyclic.growth.od$WT_0_A!="")],col=strain.colors["WT"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,7),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=0") #replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0_B!="")],y=cyclic.growth.od$WT_0_B[which(cyclic.growth.od$WT_0_B!="")],col=strain.colors["WT"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0_C!="")],y=cyclic.growth.od$WT_0_C[which(cyclic.growth.od$WT_0_C!="")],col=strain.colors["WT"],pch=2,type="o")  
#plot individual growth curves of WT DarcA replicates in tetracycline-free LB
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0_A!="")],y=cyclic.growth.od$WTarcA_0_A[which(cyclic.growth.od$WTarcA_0_A!="")],col=strain.colors["WTarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0_B!="")],y=cyclic.growth.od$WTarcA_0_B[which(cyclic.growth.od$WTarcA_0_B!="")],col=strain.colors["WTarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0_C!="")],y=cyclic.growth.od$WTarcA_0_C[which(cyclic.growth.od$WTarcA_0_C!="")],col=strain.colors["WTarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#plot individual growth curves of WT replicates in LB with 0.75 ug/ml of tetracycline
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0.75_A!="")],y=cyclic.growth.od$WT_0.75_A[which(cyclic.growth.od$WT_0.75_A!="")],col=strain.colors["WT"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,30),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=0.75 ug/ml")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0.75_B!="")],y=cyclic.growth.od$WT_0.75_B[which(cyclic.growth.od$WT_0.75_B!="")],col=strain.colors["WT"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0.75_C!="")],y=cyclic.growth.od$WT_0.75_C[which(cyclic.growth.od$WT_0.75_C!="")],col=strain.colors["WT"],pch=2,type="o")  
#plot individual growth curves of WT DarcA replicates in LB with 0.75 ug/ml
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0.75_A!="")],y=cyclic.growth.od$WTarcA_0.75_A[which(cyclic.growth.od$WTarcA_0.75_A!="")],col=strain.colors["WTarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0.75_B!="")],y=cyclic.growth.od$WTarcA_0.75_B[which(cyclic.growth.od$WTarcA_0.75_B!="")],col=strain.colors["WTarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0.75_C!="")],y=cyclic.growth.od$WTarcA_0.75_C[which(cyclic.growth.od$WTarcA_0.75_C!="")],col=strain.colors["WTarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#add legend
plot.new()
legend("center",col=strain.colors[c("WT","WTarcA")],legend = c("WT",expression(paste("WT ",Delta, "arcA"))),lty=1)

4.5 Fig. 4D

#run Growthcurver analysis again
input.matrix.growthcurver<-as.data.frame(cbind(time.data,growth.data.normalized))
#rename column with time information
colnames(input.matrix.growthcurver)[1]<-"time"
#run Growthcurver for the full 48h
growthcurver.output <- SummarizeGrowthByPlate(input.matrix.growthcurver)
#create Fig. 4D (left panel)
#data frame with Growthcurver-estimated AUCs of TetR, TetR DarcA and TetR DarcA + pRB3-arcA (episomal complemented strain)
auc.df<-c()
#extract AUC values of TetR from Growthcurver output
tetR.samples.positions<-grep("TetR_",growthcurver.output[,"sample"])
tetR.auc<-growthcurver.output[tetR.samples.positions,"auc_e"]
names(tetR.auc)<-growthcurver.output[tetR.samples.positions,"sample"]
#compute mean and SD for AUC values for each tetracycline concentration
temporal.auc.matrix<-cbind(sapply(1:7,function(x){mean(tetR.auc[((x-1)*6)+1:6])}),
                          sapply(1:7,function(x){sd(tetR.auc[((x-1)*6)+1:6])}))
#add values to data frame 
auc.df<-rbind(auc.df,temporal.auc.matrix)
#extract AUC values of TetR DarcA from Growthcurver output
tetR.arcA.samples.positions<-grep("TetRarcA_",growthcurver.output[,"sample"])
tetR.arcA.auc<-growthcurver.output[tetR.arcA.samples.positions,"auc_e"]
names(tetR.arcA.auc)<-growthcurver.output[tetR.arcA.samples.positions,"sample"]
#compute mean and SD for AUC values for each tetracycline concentration
temporal.auc.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.auc[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.auc[((x-1)*6)+1:6])}))
#add values to data frame 
auc.df<-rbind(auc.df,temporal.auc.matrix)
#extract AUC values of TetR DarcA + pRB3-arcA from Growthcurver output
tetR.arcA.pRB3.samples.positions<-grep("pRB3arcA_",growthcurver.output[,"sample"])
tetR.arcA.pRB3.auc<-growthcurver.output[tetR.arcA.pRB3.samples.positions,"auc_e"]
names(tetR.arcA.pRB3.auc)<-growthcurver.output[tetR.arcA.pRB3.samples.positions,"sample"]
#compute mean and SD for AUC values for each tetracycline concentration
temporal.auc.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.pRB3.auc[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.pRB3.auc[((x-1)*6)+1:6])}))
#add values to data frame 
auc.df<-rbind(auc.df,temporal.auc.matrix)
#make sure data frame format is correct
auc.tet.concentrations<-factor(rep(tetracycline.concentrations,3),levels = seq(0,24,by=4))
auc.strains<-factor(rep(c("TetR","TetRarcA","pRB3arcA"),each=7),levels = c("TetR","TetRarcA","pRB3arcA"))
#add columns with tetracycline concentration and strain information
auc.df<-cbind(as.data.frame(auc.df),auc.tet.concentrations,auc.strains)
colnames(auc.df)<-c("auc","sd","Concentration","Strain")
#create ggplot object
plot.auc.estimates<- ggplot(auc.df, aes(x=Concentration,y=auc, group=Strain,colour=Strain)) +
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=auc-sd, ymax=auc+sd), width=0.75) + theme_classic() +theme(aspect.ratio = 1)
#similar analysis for lag phase proxy (Fig. 4D right panel)
#point of inflection (ie, time point for 1/2 max OD) was used as a proxy for lag phase
#data frame with Growthcurver-estimated lag phase proxy of TetR, TetR DarcA and TetR DarcA + pRB3-arcA (episomal complemented strain)
lag.df<-c()
#extract point of inflection values of TetR from Growthcurver output
tetR.lag<-growthcurver.output[tetR.samples.positions,"t_mid"]
names(tetR.lag)<-growthcurver.output[tetR.samples.positions,"sample"]
#compute mean and SD for inflection point values for each tetracycline concentration
temporal.lag.matrix<-cbind(sapply(1:7,function(x){mean(tetR.lag[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.lag[((x-1)*6)+1:6])}))
#add values to data frame 
lag.df<-rbind(lag.df,temporal.lag.matrix)
#extract point of inflection values of TetR DarcA from Growthcurver output
tetR.arcA.lag<-growthcurver.output[tetR.arcA.samples.positions,"t_mid"]
names(tetR.arcA.lag)<-growthcurver.output[tetR.arcA.samples.positions,"sample"]
#compute mean and SD for inflection point values for each tetracycline concentration
temporal.lag.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.lag[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.lag[((x-1)*6)+1:6])}))
#add values to data frame 
lag.df<-rbind(lag.df,temporal.lag.matrix)
#extract point of inflection values of TetR DarcA + pRB3-arcA from Growthcurver output
tetR.arcA.pRB3.lag<-growthcurver.output[tetR.arcA.pRB3.samples.positions,"t_mid"]
temporal.lag.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.pRB3.lag[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.pRB3.lag[((x-1)*6)+1:6])}))
#add values to data frame 
lag.df<-rbind(lag.df,temporal.lag.matrix)
#add columns with tetracycline concentration and strain information
lag.df<-cbind(as.data.frame(lag.df),auc.tet.concentrations,auc.strains)
colnames(lag.df)<-c("lag","sd","Concentration","Strain")
#if estimated point of inflection was > 48h, then the value was set to 48
lag.df$sd[which(lag.df$lag>48)]<-0
lag.df$lag[which(lag.df$lag>48)]<-48
#create ggplot object
plot.lag.estimates<- ggplot(lag.df, aes(x=Concentration,y=lag, group=Strain,colour=Strain)) +
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=lag-sd, ymax=lag+sd), width=0.75) + theme_classic() +theme(aspect.ratio = 1)
#create Fig. 4D
grid.arrange(plot.auc.estimates + scale_color_manual(values=strain.colors[3:5])+
        theme(axis.text=element_text(size=10), axis.title = element_text(size=10),
              panel.border = element_rect(colour = "black", fill=NA, size=0.5)) 
      + xlab("[TET](ug/ml)")+ylab("AUC")+ggtitle("Fig. 4D"), plot.lag.estimates + scale_color_manual(values=strain.colors[3:5])+
        theme(axis.text=element_text(size=10), axis.title = element_text(size=10),
              panel.border = element_rect(colour = "black", fill=NA, size=0.5))
      + xlab("[TET](ug/ml)")+ylab("~Lag phase (h)"), nrow=1)

4.6 Fig. 4E

#raw and processed data is available in the "Data/NADH_NAD/nadh_nad_measurement.xlsx" file
#results are the compilation of two independent experiments
#in each experiment, the NADH and NAD concentrations of WT, WT DarcA, TetR and TetR DarcA were measured with and without tetracycline (0.75 ug/ml for WT and WT DarcA, and 4 ug/ml for TetR and TetR DarcA)
#NADH/NAD ratios in 1st experiment, cultures without tetracycline
wt.no.tetracycline.expt1<-c(0.025735294,0.052434457,0.046875)
wt.arcA.no.tetracycline.expt1<-c(0.071917808,0.081632653,0.070110701)
tetR.no.tetracycline.expt1<-c(0.075117371,0.081545064,0.043715847)
tetR.arcA.no.tetracycline.expt1<-c(0.380645161,0.129943503,0.197740113)
#NADH/NAD ratios in 1st experiment, cultures with tetracycline
wt.with.tetracycline.expt1<-c(0.05524861878,0.02032520325,0.02926829268)
wt.arcA.with.tetracycline.expt1<-c(0.04545454545,0.0351758794,0.05803571429)
tetR.with.tetracycline.expt1<-c(0.1631944444,0.1936507937,0.1032258065)
tetR.arcA.with.tetracycline.expt1<-c(0.1302931596,0.3312883436,0.2218181818)
#NADH/NAD ratios in 2nd experiment, cultures without tetracycline
wt.no.tetracycline.expt2<-c(0.052816901,0.069204152)
wt.arcA.no.tetracycline.expt2<-c()
tetR.no.tetracycline.expt2<-c(0.06185567,0.052631579)
tetR.arcA.no.tetracycline.expt2<-c(0.090425532,0.142857143,0.11409396)
#NADH/NAD ratios in 2nd experiment, cultures with tetracycline
wt.with.tetracycline.expt2<-c(0.074074074,0.050458716)
wt.arcA.with.tetracycline.expt2<-c(0.045801527,0.036363636)
tetR.with.tetracycline.expt2<-c(0.116935484,0.120622568)
tetR.arcA.with.tetracycline.expt2<-c(0.092050209,0.055793991,0.089622642)
#combine data from the two experiments
#cultures without tetracycline
wt.no.tetracycline<-c(wt.no.tetracycline.expt1,wt.no.tetracycline.expt2)
wt.arcA.no.tetracycline<-c(wt.arcA.no.tetracycline.expt1,wt.arcA.no.tetracycline.expt2)
tetR.no.tetracycline<-c(tetR.no.tetracycline.expt1,tetR.no.tetracycline.expt2)
tetR.arcA.no.tetracycline<-c(tetR.arcA.no.tetracycline.expt1,tetR.arcA.no.tetracycline.expt2)
#cultures with tetracycline
wt.with.tetracycline<-c(wt.with.tetracycline.expt1,wt.with.tetracycline.expt2)
wt.arcA.with.tetracycline<-c(wt.arcA.with.tetracycline.expt1,wt.arcA.with.tetracycline.expt2)
tetR.with.tetracycline<-c(tetR.with.tetracycline.expt1,tetR.with.tetracycline.expt2)
tetR.arcA.with.tetracycline<-c(tetR.arcA.with.tetracycline.expt1,tetR.arcA.with.tetracycline.expt2)
boxplot(wt.no.tetracycline,wt.with.tetracycline,wt.arcA.no.tetracycline,wt.arcA.with.tetracycline,
        tetR.no.tetracycline,tetR.with.tetracycline,tetR.arcA.no.tetracycline,tetR.arcA.with.tetracycline, col=rep(strain.colors[1:4],each=2),
        names=paste(rep(c("WT","WT arcA","TetR","TetR arcA"),each=2),rep(c("(-)","(+)"),4),sep=""),las=2,
        ylab="NADH/NAD",ylim=c(0,0.4),cex=.7,cex.lab=.7, cex.axis=.7,main="Fig. 4E")

#T-test with respect to wt(-TET)
print(paste("T-test p-value for WT(-) vs WT arcA(-):",t.test(wt.no.tetracycline,wt.arcA.no.tetracycline)$p.value,sep=""))
[1] "T-test p-value for WT(-) vs WT arcA(-):0.0205071363870608"
print(paste("T-test p-value for WT(-) vs TetR(-):",t.test(wt.no.tetracycline,tetR.no.tetracycline)$p.value,sep=""))
[1] "T-test p-value for WT(-) vs TetR(-):0.207019700535709"
print(paste("T-test p-value for WT(-) vs TetR arcA(-):",t.test(wt.no.tetracycline,tetR.arcA.no.tetracycline)$p.value,sep=""))
[1] "T-test p-value for WT(-) vs TetR arcA(-):0.0329038018590071"
#T-test (-TET) vs (+TET) for each strain
print(paste("T-test p-value for WT(+) vs WT(-):",t.test(wt.no.tetracycline,wt.with.tetracycline)$p.value,sep=""))
[1] "T-test p-value for WT(+) vs WT(-):0.773695383582882"
print(paste("T-test p-value for WT arcA(+) vs WT arcA(-):",t.test(wt.arcA.no.tetracycline,wt.arcA.with.tetracycline)$p.value,sep=""))
[1] "T-test p-value for WT arcA(+) vs WT arcA(-):0.00162431938950198"
print(paste("T-test p-value for TetR(+) vs TetR(-):",t.test(tetR.no.tetracycline,tetR.with.tetracycline)$p.value,sep=""))
[1] "T-test p-value for TetR(+) vs TetR(-):0.00738632682251771"
print(paste("T-test p-value for TetR arcA(+) vs TetR arcA(-):",t.test(tetR.arcA.no.tetracycline,tetR.arcA.with.tetracycline)$p.value,sep=""))
[1] "T-test p-value for TetR arcA(+) vs TetR arcA(-):0.719428422469656"

5.1 Fig. 5A

#read metabolome similarity matrix from Campos & Zampieri 2019 (Molecular cell-Table S3)
similarity.matrix<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/drug_gene_similarity_matrix.csv",row.names=1)
#read matrix with p-values for metabolic profile similarity from Campos & Zampieri 2019 (Molecular cell-Table S3)
pvalue.matrix<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/drug_gene_similarity_pvalue.csv",row.names=1)
#generate Fig. 5A
plot(x=similarity.matrix[,"arcA"],y=-1*log10(pvalue.matrix[rownames(similarity.matrix),"arcA"]),xlab="Drug treatment-arcA KO similarity",ylab="-log10 p-value", main="Fig. 5A")
#highlight the two treatments more similar to the arcA deletion
points(x=similarity.matrix[grep("Cefpiramide",rownames(similarity.matrix)),"arcA"],y=-1*log10(pvalue.matrix[grep("Cefpiramide",rownames(pvalue.matrix)),"arcA"]),col="red",pch=19)
text(x=similarity.matrix[grep("Cefpiramide",rownames(similarity.matrix)),"arcA"]-0.02,y=-1*log10(pvalue.matrix[grep("Cefpiramide",rownames(pvalue.matrix)),"arcA"]),"Cefpiramide")
points(x=similarity.matrix[grep("Sertraline",rownames(similarity.matrix)),"arcA"],y=-1*log10(pvalue.matrix[grep("Sertraline",rownames(pvalue.matrix)),"arcA"]),col="darkblue",pch=19)
text(x=similarity.matrix[grep("Sertraline",rownames(similarity.matrix)),"arcA"],y=-1*log10(pvalue.matrix[grep("Sertraline",rownames(pvalue.matrix)),"arcA"])+0.3,"Sertraline")

#check position of arcA KO metabolic profile within ranking  of all gene deletions for the top 2 compounds
arcA.sertraline.ranking<-length(which(as.numeric(similarity.matrix["Sertraline",]) > similarity.matrix["Sertraline","arcA"]))
print(paste("Position of arcA deletion in similarity ranking with respect to Sertraline treatment profile was:",arcA.sertraline.ranking+1,"/",nrow(similarity.matrix),sep=""))
[1] "Position of arcA deletion in similarity ranking with respect to Sertraline treatment profile was:17/1279"
arcA.cefpiramide.ranking<-length(which(as.numeric(similarity.matrix["Cefpiramide",]) > similarity.matrix["Cefpiramide","arcA"]))
print(paste("Position of arcA deletion in similarity ranking with respect to Cefpiramide treatment profile was:",arcA.cefpiramide.ranking+1,"/",nrow(similarity.matrix),sep=""))
[1] "Position of arcA deletion in similarity ranking with respect to Cefpiramide treatment profile was:19/1279"

5.2 Fig. S7

#read metabolic profiles of treated E. coli (z-scores) 
#the dataset contains E. coli metabolic profiles (969 metabolites) during treatment with 1,279 compounds
#data sourced from Campos & Zampieri 2019 (Molecular cell-Table S1)
zampieri.metabolic.profiles<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/zscores_metabolic_profile.csv",header=T)
#plot distribution of Z-scores in sertraline treatment
par(mfrow=c(1,2))
hist(zampieri.metabolic.profiles[,"Sertraline"],breaks=100,xlab="Metabolite z-score",main="Sertraline treatment")
#define threshold to consider a metabolite as responsive to sertraline (top 10% absolute value)
sertraline.zscore.threshold<-as.numeric(quantile(abs(zampieri.metabolic.profiles[,"Sertraline"]),0.9,na.rm = T))
#define metabolites whose concentration was altered by sertraline
sertraline.treatment.differential.metabolites<-zampieri.metabolic.profiles[which(abs(zampieri.metabolic.profiles[,"Sertraline"])>= sertraline.zscore.threshold),"Metabolite_ids"]
#remove duplication
sertraline.treatment.differential.metabolites<-unique(sertraline.treatment.differential.metabolites)
#remove empty entry
sertraline.treatment.differential.metabolites<-as.vector(sertraline.treatment.differential.metabolites[-26])
#split entries with multiple KEGG IDs
sertraline.treatment.differential.metabolites<-sapply(1:length(sertraline.treatment.differential.metabolites),function(x){strsplit(sertraline.treatment.differential.metabolites[x],split="~")[[1]]})
#save file with KEGG id of altered metabolites
#write.csv(file="../Data/Metabolic_profiles/Campos_Zampieri_data/differential_metabolites_sertraline_08302022.csv",array(as.character(unlist(sertraline.treatment.differential.metabolites))))
#repeat analysis for cefpiramide data
#plot distribution of Z-scores in cefpiramide treatment
hist(zampieri.metabolic.profiles[,"Cefpiramide"],breaks=100,xlab="Metabolite z-score",main="Cefpiramide treatment")

#define threshold to consider a metabolite as responsive to cefpiramide
cefpiramide.zscore.threshold<-as.numeric(quantile(abs(zampieri.metabolic.profiles[,"Cefpiramide"]),0.9,na.rm = T))
#define metabolites whose concentration was altered by cefpiramide
cefpiramide.treatment.differential.metabolites<-zampieri.metabolic.profiles[which(abs(zampieri.metabolic.profiles[,"Cefpiramide"])>= cefpiramide.zscore.threshold),"Metabolite_ids"]
#remove duplication
cefpiramide.treatment.differential.metabolites<-unique(cefpiramide.treatment.differential.metabolites)
#remove empty entry
cefpiramide.treatment.differential.metabolites<-as.vector(cefpiramide.treatment.differential.metabolites[-47])
#split entries with multiple KEGG IDs
cefpiramide.treatment.differential.metabolites<-sapply(1:length(cefpiramide.treatment.differential.metabolites),function(x){strsplit(cefpiramide.treatment.differential.metabolites[x],split="~")[[1]]})
#save IDs of altered metabolites
#write.csv(file="../Data/Metabolic_profiles/Campos_Zampieri_data/differential_metabolites_cefpiramide_related_08302022.csv",array(as.character(unlist(cefpiramide.treatment.differential.metabolites))))
#perform differential expression analysis of arcA KO transcriptional data from Park et al. 2013 (PLoS Genetics)
#read normalized GEO data (accession ID: GSE46415)
arcA.ko.park.transcriptomes<-read.csv("../Data/Differential_expression_analysis/GEO_Park2013/arcA_ko_transcriptomics_Park et al_2013.csv",header=T,row.names = 1)
#update row names with locus tag
rownames(arcA.ko.park.transcriptomes)<-sapply(1:nrow(arcA.ko.park.transcriptomes),function(x){strsplit(rownames(arcA.ko.park.transcriptomes)[x],split = "0710")[[1]][1]})
#log2 transform the values
arcA.ko.park.transcriptomes<-log2(arcA.ko.park.transcriptomes)
#perform differential expression analysis with BayesT as before
arcA.ko.response.park<-bayesT(arcA.ko.park.transcriptomes,numC = 3,numE = 3,conf = 7,doMulttest = T)
arcA.ko.degs.park<-rownames(arcA.ko.response.park)[which(arcA.ko.response.park$BH < 0.05 & abs(arcA.ko.response.park$meanC - arcA.ko.response.park$meanE)>1)]
#save file with DEGs
#write.csv(file="../Data/Metabolic_profiles/Campos_Zampieri_data/arcA_KO_DEGs_Park2013.csv",arcA.ko.degs.park)
#identify metabolites whose concentration is altered by the deletion of arcA
#data  sourced from Fuhrer et al. (Molecular Systems Biology 2017)
#data was downloaded from the BioStudies database (accession S-BSST5)
negative.ions.to.metabolites.map<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/neg_kegg_all_3mD.csv")
positive.ions.to.metabolites.map<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/pos_kegg_all_3mD.csv")
negative.ions.zscores<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/negative_ions_zscores_gene_kos.csv",header=T,row.names = 1)
positive.ions.zscores<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/positive_ions_zscores_gene_kos.csv",header=T,row.names = 1)
#combine all z-scores in a single matrix
all.ions.zscores<-rbind(as.matrix(positive.ions.zscores),as.matrix(negative.ions.zscores))
#define threshold for top 10% absolute z-scores for arcA KO
arcA.zscore.threshold<-as.numeric(quantile(abs(all.ions.zscores[,"arcA"]),0.9))
#define vectors with arcA KO z-scores
arcA.positive.ions.zscores<-positive.ions.zscores[,"arcA"]
arcA.negative.ions.zscores<-negative.ions.zscores[,"arcA"]
#identify decreased metabolites associated with (+) ions
arcA.positive.ions.decreased<-which(arcA.positive.ions.zscores <= -1* arcA.zscore.threshold)
#map altered metabolites to their KEGG ids
arcA.positive.ions.decreased.kegg<-positive.ions.to.metabolites.map[which(positive.ions.to.metabolites.map[,"ion"] %in% arcA.positive.ions.decreased),"id"]
#identify decreased metabolites associated with (-) ions
arcA.negative.ions.decreased<-which(arcA.negative.ions.zscores <= -1* arcA.zscore.threshold)
#map altered metabolites to their KEGG ids
arcA.negative.ions.decreased.kegg<-negative.ions.to.metabolites.map[which(negative.ions.to.metabolites.map[,"ion"] %in% arcA.negative.ions.decreased),"id"]
#combine all KEGG ids
arcA.decreased.metabolites<-union(arcA.positive.ions.decreased.kegg,arcA.negative.ions.decreased.kegg)
#identify increased metabolites associated with (+) ions
arcA.positive.ions.increased<-which(arcA.positive.ions.zscores >= arcA.zscore.threshold)
#map altered metabolites to their KEGG ids
arcA.positive.ions.increased.kegg<-positive.ions.to.metabolites.map[which(positive.ions.to.metabolites.map[,"ion"] %in% arcA.positive.ions.increased),"id"]
#identify increased metabolites associated with (-) ions
arcA.negative.ions.increased<-which(arcA.negative.ions.zscores >=  arcA.zscore.threshold)
arcA.negative.ions.increased.kegg<-negative.ions.to.metabolites.map[which(negative.ions.to.metabolites.map[,"ion"] %in% arcA.negative.ions.increased),"id"]
#combine all KEGG ids
arcA.increased.metabolites<-union(arcA.negative.ions.increased.kegg,arcA.positive.ions.increased.kegg)
#save metabolite ids in a .csv file
#write.csv(file="../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/arcA_ko_differential_metabolites_kegg_083122.csv",union(arcA.increased.metabolites,arcA.decreased.metabolites))
#generate Fig. S7
#read MetaboAnalyst output files
metaboanalyst.output.sertraline<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/MetaboAnalyst_sertraline_pathway_results_08302022.csv")
metaboanalyst.output.cefpiramide<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/MetaboAnalyst_cefpiramide_pathway_results_08302022.csv")
metaboanalyst.output.arcA.ko<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/MetaboAnalyst_arcA_KO_result_pathway_083122.csv")
#choose color palette for heatmap
heatmap.colors <- c("black","grey","white")
#create tables with perturbed pathways based on FDR p-values
sertraline.perturbed.pathways<-metaboanalyst.output.sertraline[which(metaboanalyst.output.sertraline[,"FDR"]<= 0.25),c("X","FDR")]
cefpiramide.perturbed.pathways<-metaboanalyst.output.cefpiramide[which(metaboanalyst.output.cefpiramide[,"FDR"]<= 0.25),c("X","FDR")]
arcA.ko.perturbed.pathways<-metaboanalyst.output.arcA.ko[which(metaboanalyst.output.arcA.ko[,"FDR"]<= 0.25),c("X","FDR")]
#create vector with altered metabolic pathways
perturbed.pathways.combined<-unique(c(as.character(sertraline.perturbed.pathways[,"X"]),as.character(cefpiramide.perturbed.pathways[,"X"]),as.character(arcA.ko.perturbed.pathways[,"X"])))
#create matrix with pathways identified as perturbed by MetaboAnalyst 
perturbed.pathways.matrix<-matrix(ncol=3,nrow=length(perturbed.pathways.combined),dimnames = list(perturbed.pathways.combined,c("arcA KO","Sertraline-treated","Cefpiramide-treated")),1)
#fill out the matrix
for(pathway in perturbed.pathways.combined)
{
  if( pathway %in% sertraline.perturbed.pathways[,"X"])
  {
    current.pathway.position<-which(sertraline.perturbed.pathways[,"X"]==pathway)
    perturbed.pathways.matrix[pathway,"Sertraline-treated"]<- sertraline.perturbed.pathways[current.pathway.position,"FDR"]
  }
  if( pathway %in% cefpiramide.perturbed.pathways[,"X"])
  {
    current.pathway.position<-which(cefpiramide.perturbed.pathways[,"X"]==pathway)
    perturbed.pathways.matrix[pathway,"Cefpiramide-treated"]<- cefpiramide.perturbed.pathways[current.pathway.position,"FDR"]
  }
  if( pathway %in% arcA.ko.perturbed.pathways[,"X"])
  {
    current.pathway.position<-which(arcA.ko.perturbed.pathways[,"X"]==pathway)
    perturbed.pathways.matrix[pathway,"arcA KO"]<- arcA.ko.perturbed.pathways[current.pathway.position,"FDR"]
  }
}
#plot heatmap
pheatmap(t(perturbed.pathways.matrix),color =heatmap.colors,scale = "none",cluster_rows=F,cluster_cols = F,fontsize = 8,angle_col = 90, breaks = c(0,0.1,0.25,1),main="Fig. S7")

5.3 Fig. 5B

#assess susceptibility of the WT, WT DarcA, TetR and TetR DarcA strains to sertraline 
#read OD readings from Bioscreen experiments to generate corresponding growth curves
#first, read growth data for WT and WT DarcA treated with a range of sertraline concentrations 
#for each strain/sertraline concentration, there were three biological replicates (each one with three replicates)
#bioscreen experiment was performed for a total of ~25h
#WT strain was cultured in plate # 1 (ie, wells #1-100)
#WT DarcA strain was cultured in plate # 2 (ie, wells #101-200)
growth.data.WT.WTarcA.sertraline<-read.csv("../Data/Sertraline_dilution/WT_and_WT arcA/2022_0222_formatted_data.csv",row.names=1)
#normalize data by subtracting minimum OD reading per well 
#normalized matrix includes the first ~16h of growth
growth.data.WT.WTarcA.sertraline.16h.normalized<-c()
for(i in 1:ncol(growth.data.WT.WTarcA.sertraline))
{
  minimum.reading.current.well<-min(growth.data.WT.WTarcA.sertraline[1:33,i])
  growth.data.WT.WTarcA.sertraline.16h.normalized<-cbind(growth.data.WT.WTarcA.sertraline.16h.normalized,growth.data.WT.WTarcA.sertraline[1:33,i]-minimum.reading.current.well)
}
#add column names
colnames(growth.data.WT.WTarcA.sertraline.16h.normalized)<-colnames(growth.data.WT.WTarcA.sertraline)
#define maximum OD among blank wells (wells # 92-100 were included as a control for WT, and wells # 192-200 as a control for WT arcA) 
#1st OD reading was excluded in this analysis because it commonly gives a higher than expected reading (ie, higher than the reading taken a few minutes later)
wt.max.blank<-max(growth.data.WT.WTarcA.sertraline.16h.normalized[-1,92:100])
wt.arcA.max.blank<-max(growth.data.WT.WTarcA.sertraline.16h.normalized[-1,192:200])
#define wells where growth was observed as those where max OD > 2x maximum growth detected in blank wells (inoculated with sterile LB)
wt.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.WT.WTarcA.sertraline.16h.normalized[-1,2:90])>2*wt.max.blank))
wt.arcA.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.WT.WTarcA.sertraline.16h.normalized[-1,102:190])>2*wt.arcA.max.blank))
#estimate WT MIC for sertraline
wt.sertraline.concentrations<-paste("WT_",c(0,seq(30,60,by=5),80),sep="")
#define default MIC as the highest concentration that was tested
wt.sertraline.mic<-80
for(wt.conc in wt.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(wt.conc,split="_")[[1]][2])
if (length(grep(wt.conc,wt.wells.that.grew))<2 & temporal.concentration < wt.sertraline.mic)
{
  wt.sertraline.mic<-temporal.concentration
}
}
#estimate WT DarcA MIC for sertraline
wt.arcA.sertraline.concentrations<-paste("WTarcA_",c(0,seq(30,60,by=5),80),sep="") #values in ug/ml
#default MIC value
wt.arcA.sertraline.mic<-80
for(wt.arcA.conc in wt.arcA.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(wt.arcA.conc,split="_")[[1]][2])
if (length(grep(wt.arcA.conc,wt.arcA.wells.that.grew))<2 & temporal.concentration < wt.arcA.sertraline.mic)
{
  wt.arcA.sertraline.mic<-temporal.concentration
}
}
print(paste("WT Sertraline MIC:",wt.sertraline.mic,"ug/ml",sep=""))
[1] "WT Sertraline MIC:45ug/ml"
print(paste("WT DarcA Sertraline MIC:",wt.arcA.sertraline.mic,"ug/ml",sep=""))
[1] "WT DarcA Sertraline MIC:50ug/ml"
#second, read growth data for TetR and TetR DarcA treated with different sertraline concentrations 
#experiment was performed for a total of ~24h
#TetR strain was cultured in plate # 1 (ie, wells #1-100)
#TetR DarcA strain was cultured in plate # 2 (ie, wells #101-200)
growth.data.TetR.TetRarcA.sertraline<-read.csv("../Data/Sertraline_dilution/TetR_and_TetR arcA/2022_0221_formatted_data.csv",row.names=1)
#normalize data by subtracting minimum OD reading per well
#normalized matrix includes the first ~16h of growth
growth.data.TetR.TetRarcA.sertraline.16h.normalized<-c()
for(i in 1:ncol(growth.data.TetR.TetRarcA.sertraline))
{
  minimum.reading.current.well<-min(growth.data.TetR.TetRarcA.sertraline[1:33,i])
  growth.data.TetR.TetRarcA.sertraline.16h.normalized<-cbind(growth.data.TetR.TetRarcA.sertraline.16h.normalized,                                    growth.data.TetR.TetRarcA.sertraline[1:33,i]-minimum.reading.current.well)
}
#add column names
colnames(growth.data.TetR.TetRarcA.sertraline.16h.normalized)<-colnames(growth.data.TetR.TetRarcA.sertraline)
#define maximum OD among blank wells (wells # 92-100 were included as a control for TetR, and wells # 192-200 as a control for TetR arcA) 
tetR.max.blank<-max(growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,92:100])
tetR.arcA.max.blank<-max(growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,192:200])
#define wells where growth was observed as those where max OD > 2x maximum growth detected in blank wells (inoculated with sterile LB)
tetR.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,2:90])>2*tetR.max.blank))
tetR.arcA.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,102:190])>2*tetR.arcA.max.blank))
#estimate TetR MIC for sertraline
tetR.sertraline.concentrations<-paste("TetR_",c(0,seq(20,50,by=5),60),sep="")
#default value
tetR.sertraline.mic<-60
for(tetR.conc in tetR.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(tetR.conc,split="_")[[1]][2])
if (length(grep(tetR.conc,tetR.wells.that.grew))<2 & temporal.concentration < tetR.sertraline.mic)
{
  tetR.sertraline.mic<-temporal.concentration
}
}
#define TetR DarcA MIC for sertraline
tetR.arcA.sertraline.concentrations<-paste("TetRarcA_",c(0,5,seq(10,20,by=2.5),25,30),sep="") #values in ug/ml
#default MIC value
tetR.arcA.sertraline.mic<-30
for(tetR.arcA.conc in tetR.arcA.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(tetR.arcA.conc,split="_")[[1]][2])
if (length(grep(tetR.arcA.conc,tetR.arcA.wells.that.grew))<2 & temporal.concentration < tetR.arcA.sertraline.mic)
{
  tetR.arcA.sertraline.mic<-temporal.concentration
}
}
print(paste("TetR Sertraline MIC:",tetR.sertraline.mic,"ug/ml",sep=""))
[1] "TetR Sertraline MIC:35ug/ml"
print(paste("TetR DarcA Sertraline MIC:",tetR.arcA.sertraline.mic,"ug/ml",sep=""))
[1] "TetR DarcA Sertraline MIC:17.5ug/ml"
#create Fig. 5B
#create vector with all sertraline concentrations (ug/ml) tested among the four strains
sertraline.concentrations<-c(0,5,10,12.5,15,17.5,20,25,30,35,40,45,50,55,60,80)
#define growth pattern (1= growth, 0= no growth, -1: not tested) of each strain in the full range of sertraline concentrations 
tetR.dose.response<-c(1,rep(-1,5),rep(1,3),rep(0,4),-1, 0,-1)
tetR.arcA.dose.response<-c(rep(1,5),rep(0,4), rep(-1,7))
wt.dose.response<-c(1, rep(-1,7),rep(1,3),rep(0,5))
wt.arcA.dose.response<- c(1, rep(-1,7),rep(1,4),rep(0,4))
#create dose response matrix
sertraline.dose.response.matrix<-cbind(wt.dose.response, wt.arcA.dose.response, 
                             tetR.dose.response,tetR.arcA.dose.response)
rownames(sertraline.dose.response.matrix)<-sertraline.concentrations
colnames(sertraline.dose.response.matrix)<-c("WT","WT arcA", "TetR","TetR arcA")
heatmap.breaks<-c(-1,-0.33,0.33,1)
pheatmap(sertraline.dose.response.matrix[16:1,],scale="none",color= c("grey","#BEAED4","#7FC97F"),
         cluster_rows=F,cluster_cols = F,fontsize = 11,angle_col = 45, breaks = heatmap.breaks,
         legend_breaks=heatmap.breaks,legend=T,main="Fig. 5B",annotation_row = )

5.4 Compute FIC2 scores of WT for the sertraline-tetracycline combination

#analyze results of DiaMOND assay for the sertraline-tetracycline combination
#function to create a monotonically decreasing dose response curve (as described in the original DiaMOND manuscript, Cokol et al. Science Advances 2017)
create.monotonically.decreasing.vector<-function(od.vector)
{
  output.vector<-c(od.vector[1])
  for(i in 2:length(od.vector))
  {
    current.point<-od.vector[i]
    if(current.point<= od.vector[i-1] & current.point <= output.vector[i-1])
    {
      output.vector<-c(output.vector,current.point)
    }
    else{
      output.vector<-c(output.vector,output.vector[i-1])
    }
  }
  output.vector
}
#function to normalize OD vector at a given time point with respect to an untreated control (initial point)
normalize.vector<-function(od.vector)
{
  output.vector<-od.vector/od.vector[1]
  output.vector
}
#function to interpolate IC50 values
interpolate.ic50<-function(normalized.od.vector,abx.concentration.range)
{
  #abx.concentration.range is the set of antibiotic concentrations associated with the dose response vector (ie, the normalized od vector)
  #confirm that the normalized vector contains values above and below 0.5 (ie, the IC50 point)
  positions.below.ic50<-which(normalized.od.vector < 0.5)
  positions.above.ic50<-which(normalized.od.vector > 0.5)
  if(length(positions.below.ic50)>0 & length(positions.above.ic50)>0)
  {
    #spline fitting
    temporal.spline.fitting<-spline(abx.concentration.range, normalized.od.vector, n=10000000)
    #estimate IC50
    estimated.ic50<-temporal.spline.fitting$x[which(abs(temporal.spline.fitting$y-0.5)==min(abs(temporal.spline.fitting$y-0.5)))]
  }
  estimated.ic50
}
#analyze DiaMOND results for WT and WT DarcA strains
#data is in a matrix with 384 columns (each column is a well in the plate)
wt.wt.arcA.biotek.data<-read.csv("../Data/DiaMOND_assay/Sertraline_Tetracycline/WT_and_WT arcA/2022_0226_formatted_data.CSV",row.names=1)
#in the 384 well plate:
#rows # 1-5 were used for sertraline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 6-10 were used for tetracycline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 11-15 were used for sertraline-tetracycline combination linear dilutions (ie, 0x-2x by 0.2 increments)
#columns #1 and #24 were blank (LB only) 
#columns #2-12 were inoculated with WT and columns # 13-23 were inoculated with WT arcA KO 
#remove column with temperature data
wt.wt.arcA.biotek.data<-wt.wt.arcA.biotek.data[,-1]
#define difference in OD (delta OD) for each well after 16h of growth (max OD - min OD)
#the 16h growth window was defined based on the DiaMOND protocol (Cokol-Cakmak et al. JOVE 2018)
wt.wt.arcA.biotek.delta.od<-c()
for(c in 1:ncol(wt.wt.arcA.biotek.data))
{
  wt.wt.arcA.biotek.delta.od<-c(wt.wt.arcA.biotek.delta.od,                  max(wt.wt.arcA.biotek.data[1:33,c])-min(wt.wt.arcA.biotek.data[1:33,c]))
}
names(wt.wt.arcA.biotek.delta.od)<-colnames(wt.wt.arcA.biotek.data)
#extract delta OD values for WT strain
wt.delta.od.matrix<-c()
#one row will correspond to a linear dilution (from 0x to 2x by 0.2x increments)
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,2:12,sep="")
  wt.delta.od.matrix<-rbind(wt.delta.od.matrix,wt.wt.arcA.biotek.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
wt.ic50.vector<-c()
concentration.linear.range<-seq(0,2,by=0.2)
for(r in 1:nrow(wt.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(wt.delta.od.matrix[r,])
  #plot delta OD data (ie, effect of treatment on growth)
  if(r <6)
  {
  plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
       xlab="[Sertraline(x IC50)]",ylab='Normalized OD')
  abline(h=0.5,col="grey",lty=2)
  }
  if(r >5 & r <11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="grey",lty=2)
  }
  if(r >10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline-Sertraline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="grey",lty=2)
  }
  #define IC50 using the approxfun function
  temporal.approx.func.output<-approxfun(x=normalize.vector(current.od.vector),y=concentration.linear.range)
  ic50.interpolated.value<-temporal.approx.func.output(0.5)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  wt.ic50.vector<-c(wt.ic50.vector,round(ic50.interpolated.value,digits=3))
}

collapsing to unique 'x' values

#define WT FIC2 for each biological replicate
wt.fic<-c()
for(i in 1:5)
{
  wt.fic<-c(wt.fic,wt.ic50.vector[i+10]/(2*(wt.ic50.vector[i]*wt.ic50.vector[i+5])/(wt.ic50.vector[i]+wt.ic50.vector[i+5])))
}
print(paste("WT FIC2:",round(geoMean(wt.fic),digits = 3),sep=""))
[1] "WT FIC2:1.269"

5.5 Compute FIC2 scores of WT arcA for the sertraline-tetracycline combination

#extract delta OD values for the WT arcA strain
wt.arcA.delta.od.matrix<-c()
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,13:23,sep="")
  wt.arcA.delta.od.matrix<-rbind(wt.arcA.delta.od.matrix,wt.wt.arcA.biotek.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
wt.arcA.ic50.vector<-c()
for(r in 1:nrow(wt.arcA.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(wt.arcA.delta.od.matrix[r,])
  #plot delta OD data
   if(r <6)
  {
  plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
       xlab="[Sertraline(x IC50)]",ylab='Normalized OD')
  abline(h=0.5,col="red",lty=2)
  }
  if(r >5 & r < 11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  #define IC50 using the approxfun function
  temporal.approx.func<-approxfun(x=normalize.vector(current.od.vector),y=concentration.linear.range)
  ic50.interpolated.value<-temporal.approx.func(0.5)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  wt.arcA.ic50.vector<-c(wt.arcA.ic50.vector,round(ic50.interpolated.value,digits=3))
}

collapsing to unique 'x' values

#compute FIC2 of WT arcA replicates
wt.arcA.fic<-c()
for(i in 1:5)
{
  wt.arcA.fic<-c(wt.arcA.fic,wt.arcA.ic50.vector[i+10]/(2*(wt.arcA.ic50.vector[i]*wt.arcA.ic50.vector[i+5])/(wt.arcA.ic50.vector[i]+wt.arcA.ic50.vector[i+5])))
}
print(paste("WT arcA FIC2:",round(geoMean(wt.arcA.fic),digits=3),sep=""))
[1] "WT arcA FIC2:1.228"

5.6 Compute FIC2 of TetR arcA for the sertraline-tetracycline combination

#analyze DiaMOND results of the TetR and TetR DarcA strains
#data is in a matrix with 384 columns (each column is a well in a plate)
tetR.tetR.arcA.biotek.data<-read.csv("../Data/DiaMOND_assay/Sertraline_Tetracycline/TetR_and_TetR arcA/run1/2022_0301_formatted_data.CSV",row.names=1)
#in the 384 well plate:
#rows # 1-5 were used for sertraline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 6-10 were used for tetracycline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 11-15 were used for sertraline-tetracycline combination linear dilutions (ie, 0x-2x by 0.2 increments)
#columns #1 and #24 were blank (LB only) 
#columns #2-12 were inoculated with TetR and columns # 13-23 were inoculated with TetR arcA KO 
#remove column with temperature data
tetR.tetR.arcA.biotek.data<-tetR.tetR.arcA.biotek.data[,-1]
#define difference in OD (delta OD) for each well after 16h of growth (max OD - min OD)
#the 16h growth window was defined based on the DiaMOND protocol (Cokol-Cakmak et al. JOVE 2018)
tetR.tetR.arcA.delta.od<-c()
for(c in 1:ncol(tetR.tetR.arcA.biotek.data))
{
  tetR.tetR.arcA.delta.od<-c(tetR.tetR.arcA.delta.od,                            max(tetR.tetR.arcA.biotek.data[1:33,c])-min(tetR.tetR.arcA.biotek.data[1:33,c]))
}
names(tetR.tetR.arcA.delta.od)<-colnames(tetR.tetR.arcA.biotek.data)
#extract delta OD values for TetR arcA strain
tetR.arcA.delta.od.matrix<-c()
#one row will correspond to a linear dilution (from 0x to 2x by 0.2x increments)
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,13:23,sep="")
  tetR.arcA.delta.od.matrix<-rbind(tetR.arcA.delta.od.matrix,tetR.tetR.arcA.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
tetR.arcA.ic50.vector<-c()
for(r in 1:nrow(tetR.arcA.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(tetR.arcA.delta.od.matrix[r,])
  if(r<6)
  {
  plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
       xlab="[Sertraline(x IC50)]",ylab='Normalized OD')
  abline(h=0.5,col="red",lty=2)
  }
  if(r>5 & r<11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  #define IC50 using the approxfun function
  temporal.approx.func<-approxfun(x=normalize.vector(current.od.vector),y=concentration.linear.range)
  ic50.interpolated.value<-temporal.approx.func(0.5)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  tetR.arcA.ic50.vector<-c(tetR.arcA.ic50.vector,round(ic50.interpolated.value,digits=3))
}

#define FIC2 for TetR arcA replicates
tetR.arcA.fic<-c()
for(i in 1:5)
{
  tetR.arcA.fic<-c(tetR.arcA.fic,tetR.arcA.ic50.vector[i+10]/(2*(tetR.arcA.ic50.vector[i]*tetR.arcA.ic50.vector[i+5])/(tetR.arcA.ic50.vector[i]+tetR.arcA.ic50.vector[i+5])))
}
print(paste("TetR arcA FIC2:",round(geoMean(tetR.arcA.fic),digits=3),sep=""))
[1] "TetR arcA FIC2:1.2"

5.7 Compute FIC2 of TetR

#extract delta OD values for TetR strain
#NOTE:replicates with more than one potential IC50 (on the raw data) or increased growth after potential IC50 were not used for FIC2 calculation
tetR.delta.od.matrix<-c()
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,2:12,sep="")
  tetR.delta.od.matrix<-rbind(tetR.delta.od.matrix,tetR.tetR.arcA.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
tetR.ic50.vector<-c()
for(r in 1:nrow(tetR.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(tetR.delta.od.matrix[r,])
  #we first plot the raw dose-response curve (black line)
  if(r<6)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix[r,]),type="l", xlab="[Sertraline(x IC50)]",ylab='Normalized OD',main="Expt1")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>5 & r<11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix[r,]),type="l",xlab="[Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt1")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix[r,]),type="l", xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt1")
    abline(h=0.5,col="red",lty=2)
  }
  #also plot monotonically decreasing dose response curve (orange line)
  points(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",col="orange")
  #define IC50 using the in-house spline fitting function (more accurate than approxfun)
  ic50.interpolated.value<-interpolate.ic50(normalize.vector(current.od.vector),concentration.linear.range)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  tetR.ic50.vector<-c(tetR.ic50.vector,round(ic50.interpolated.value,digits=3))
}

#replicates # 1, #3 and #4 not used for FIC2 estimation after visual inspection of the sertraline dose response curves 
#a second experiment (below) was performed  to increase the number of biological replicates
#define FIC2 of TetR replicates in experiment # 1
tetR.fic.expt1<-c()
for(i in 1:5)
{
  tetR.fic.expt1<-c(tetR.fic.expt1,tetR.ic50.vector[i+10]/(2*(tetR.ic50.vector[i]*tetR.ic50.vector[i+5])/(tetR.ic50.vector[i]+tetR.ic50.vector[i+5])))
}
tetR.fic.expt1<-tetR.fic.expt1[c(2,5)]
#read data collected on a second DiaMOND experiment for TetR (five biological replicates)
tetR.biotek.data.expt2<-read.csv("../Data/DiaMOND_assay/Sertraline_Tetracycline/TetR_and_TetR arcA/run2/2022_0413_formatted_data.CSV",row.names=1)
#in the 384 well plate:
#rows # 1-5 were used for sertraline linear dilutions (0x,.2x,.4x,.6x,.8x,1.0x,1.2x,1.6x,1.4x,1.8x,2.0x))
#rows # 6-10 were used for tetracycline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 11-15 were used for sertraline-tetracycline combination linear dilutions (ie, 0x-2x by 0.2 increments)
#columns #1, #13-15 were blank (LB only) 
#columns #2-12 were inoculated with TetR 
#remove column with temperature data
tetR.biotek.data.expt2<-tetR.biotek.data.expt2[,-1]
#extract delta OD values 
tetR.biotek.delta.od.expt2<-c()
for(c in 1:ncol(tetR.biotek.data.expt2))
{
  tetR.biotek.delta.od.expt2<-c(tetR.biotek.delta.od.expt2,                                 max(tetR.biotek.data.expt2[1:33,c])-min(tetR.biotek.data.expt2[1:33,c]))
}
names(tetR.biotek.delta.od.expt2)<-colnames(tetR.biotek.data.expt2)
#define tetR delta OD matrix (expt #2)
tetR.delta.od.matrix.expt2<-c()
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,2:12,sep="")
  tetR.delta.od.matrix.expt2<-rbind(tetR.delta.od.matrix.expt2,tetR.biotek.delta.od.expt2[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))

tetR.ic50.vector.expt2<-c()
for(r in 1:nrow(tetR.delta.od.matrix.expt2))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(tetR.delta.od.matrix.expt2[r,])
  if(r<6)
  {
    plot(x=concentration.linear.range[c(1:7,9,8,10:11)],y=normalize.vector(tetR.delta.od.matrix.expt2[r,c(1:7,9,8,10:11)]),type="l",
         xlab="[Sertraline(x IC50)]",ylab='Normalized OD',main="Expt2")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>5 & r<11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix.expt2[r,]),type="l",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt2")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix.expt2[r,]),type="o",
         xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt2")
    abline(h=0.5,col="red",lty=2)
  }
  #also plot monotonically decreasing version of the dose response
  points(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",col="orange")
  #define IC50 using the in-house spline fitting function
  ic50.interpolated.value<-interpolate.ic50(normalize.vector(current.od.vector),concentration.linear.range)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  tetR.ic50.vector.expt2<-c(tetR.ic50.vector.expt2,round(ic50.interpolated.value,digits=3))
}

#define FIC2 of TetR replicates in expt # 2
tetR.fic.expt2<-c()
for(i in 1:5)
{
  tetR.fic.expt2<-c(tetR.fic.expt2,tetR.ic50.vector.expt2[i+10]/(2*(tetR.ic50.vector.expt2[i]*tetR.ic50.vector.expt2[i+5])/(tetR.ic50.vector.expt2[i]+tetR.ic50.vector.expt2[i+5])))
}
#remove 5th replicate
tetR.fic.expt2<-tetR.fic.expt2[1:4]
#combine TetR FIC2 from both experiments
tetR.fic<-union(tetR.fic.expt1,tetR.fic.expt2)
print(paste("TetR FIC2:",round(geoMean(tetR.fic),digits=3),sep=""))
[1] "TetR FIC2:0.669"

5.8 Fig. 5C

boxplot(wt.fic,wt.arcA.fic,tetR.fic,tetR.arcA.fic,
        col=strain.colors[1:4],
        names=c("WT","WT arcA", "TetR","TetR arcA"),las=2,
        cex.axis=1.1,cex.lab=1.1,ylab="FIC2",ylim=c(0.5,1.3),main="Fig. 5C")
abline(h=1,col="darkgrey",lty=2)

---
title: "Computational analyses for *Disrupting the ArcA regulatory network amplifies the fitness cost of tetracycline resistance in Escherichia coli* manuscript"
output: html_notebook
author: Mario Arrieta-Ortiz et al.
date: September 7, 2022
---
0. Load required libraries
```{r message=FALSE}
library(ALDEx2)
library(corpcor)
library(EnvStats)
library(gdata)
library(ggplot2)
library(gplots)
library(gridExtra)
library(growthcurver)
library(igraph)
library(multtest)
library(network)
library(plyr)
library(pheatmap)
library(RColorBrewer)
library(Rtsne)
library(viridis)
```
1.1 Load genome annotation related to efflux pumps, metabolism and transcriptional regulation (compiled from multiple sources)
```{r}
#load transcriptional regulatory network compiled from RegulonDB (http://regulondb.ccg.unam.mx)
#rows are target genes and columns are regulators
#activation and repression are indicated with '1' and '-1', respectively
load("../Data/Input/Ecoli_TRN.RData")
#define transcription factor (TF) names
tf.names<-colnames(transcriptional.network)
#keep only locus tags for TF names (format is locus_name)
tf.names<-sapply(1:length(tf.names),function(x){strsplit(tf.names[x],split="_")[[1]][2]})
#read names of metabolic genes on the E. coli metabolic model iJO1366 (Orth et al. 2011, MSB)
metabolic.genes<-read.csv("../Data/Input/genes_metabolic_model_iJO1366.csv")
metabolic.genes<-metabolic.genes$Model.genes
#read efflux pump related genes (compiled from EcoCyc database and literature review)
effluxPump.genes<-read.csv("../Data/Input/efflux_pump_genes.csv",header=T)
effluxPump.genes<-effluxPump.genes$Locus
#exclude TFs and efflux pump related genes from the metabolic category
metabolic.genes<-setdiff(metabolic.genes,union(tf.names,effluxPump.genes))
#create function to convert locus tag to standard gene name (e.g., b0001 to thrL)
ecoli.trn.genes<-rownames(transcriptional.network)
convert.locus.to.gene.name<-function(locus.list)
{
  output<-sapply(1:length(locus.list),function(x){strsplit(ecoli.trn.genes[grep(locus.list[x],ecoli.trn.genes)],split="_")[[1]][1]})
  output
}
```
1.2 Run differential expression analysis of transcriptional data reported by Handel et al. (2014)
```{r}
#perform differential expression analysis with a Bayesian t-test using Cyber-T algorithm (Baldi and Long, 2001)
#code downloaded from http://cybert.ics.uci.edu
source("Bayesian_Ttest/cyberTtest.R")
#read normalized GEO data (accession ID: GSE57084)
handel.normalized.matrix<-read.csv("../Data/Differential_expression_analysis/GEO_Handel2014/Handel_normalized_GEO_data.csv",header=T,row.names = 1)
#read map between microarray probe IDs and loci
probes.to.loci.map<-read.csv("../Data/Differential_expression_analysis/GEO_Handel2014/probes_loci_array_map.csv",header = T,row.names=1)
probes.to.loci.map<-as.matrix(probes.to.loci.map)
#replace probe names in expression matrix with locus tags
rownames(handel.normalized.matrix)<-probes.to.loci.map[rownames(handel.normalized.matrix),"ORF"]
#function to count the number of TFs, efflux pump genes and metabolic genes among significantly up- and down-regulated genes
classify.degs<-function(geneSet,expression.change) 
{
  output<-c()
  if(expression.change=="UP")
  {
    #count TF genes
    up.tfs<-length(intersect(geneSet,tf.names))
    #print names of up-regulated TFs
    if(up.tfs!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,tf.names))
    }
    #count efflux pump genes (eps)
    up.eps<-length(intersect(geneSet,effluxPump.genes))
    #print names of up-regulated eps
    if(up.eps!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,effluxPump.genes))
    }
    #count up-regulated metabolic genes
    up.metabolic<-length(intersect(geneSet,metabolic.genes))
    #count other up-regulated genes
    up.others<-length(geneSet)-(up.tfs+up.eps+up.metabolic)
    #compile values in a single vector
    output<-cbind(up.others,up.metabolic,up.tfs,up.eps)
  }
    if(expression.change=="DOWN")
    {
    #count TF genes
    down.tfs<- -1*length(intersect(geneSet,tf.names))
    #print names of down-regulated TFs
    if(down.tfs!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,tf.names))
    }
    #count efflux pump genes (eps)
    down.eps<- -1*length(intersect(geneSet,effluxPump.genes))
    #print names of down-regulated eps
    if(down.eps!=0)
    {
    convert.locus.to.gene.name(intersect(geneSet,effluxPump.genes))
    }
    #count down-regulated metabolic genes
    down.metabolic<- -1* length(intersect(geneSet,metabolic.genes))
    #count other down-regulated genes
    down.others<- -1*length(geneSet)-(down.tfs+down.eps+down.metabolic)
    #compile values in a single vector
    output<-cbind(down.others,down.metabolic,down.tfs,down.eps)
  }
  output
}
#run differential expression analyses 
#WT response to tetracycline: WT(+TET) vs WT(-TET) 
wt.response<-bayesT(handel.normalized.matrix[,1:6],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define set of differentially expressed genes (DEGs)
wt.response.degs<-rownames(wt.response)[which(wt.response$BH < 0.05 & abs(wt.response$meanC - wt.response$meanE)>1)]
print(paste("WT differentially expressed",length(wt.response.degs),"genes in response to tetracycline",sep=" "))
#create vector with fold-change for all genes
wt.response.fold.change<-wt.response$meanE - wt.response$meanC
names(wt.response.fold.change)<-rownames(wt.response)
#define sets of up- and down-regulated genes
wt.deg.up<-wt.response.degs[which(wt.response.fold.change[wt.response.degs]>0)]
wt.deg.down<-wt.response.degs[which(wt.response.fold.change[wt.response.degs]<0)]
#write CSV files with up- and down-regulated genes for downstream analysis on DAVID 
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/wt_response_up_080822.csv",wt.deg.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/wt_response_down_080822.csv",wt.deg.down,row.names = F)
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.wt.vector<-classify.degs(wt.deg.up,"UP")
down.wt.vector<-classify.degs(wt.deg.down,"DOWN")
#differential expression analysis for TetR baseline change: TetR(-TET) vs WT(-TET)
tetR.basal<-bayesT(handel.normalized.matrix[,c(1:3,7:9)],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define DEGs
tetR.basal.degs<-rownames(tetR.basal)[which(tetR.basal$BH < 0.05 & abs(tetR.basal$meanC - tetR.basal$meanE)>1)]
print(paste("TetR (-TET) differentially expressed",length(tetR.basal.degs),"genes with respect to WT(-TET)",sep=" "))
#save fold-change for all genes
tetR.basal.fold.change<-tetR.basal$meanE - tetR.basal$meanC
names(tetR.basal.fold.change)<-rownames(tetR.basal)
#define sets of up- and down-regulated genes
tetR.basal.degs.up<-tetR.basal.degs[which(tetR.basal.fold.change[tetR.basal.degs]>0)]
tetR.basal.degs.down<-tetR.basal.degs[which(tetR.basal.fold.change[tetR.basal.degs]<0)]
#write CSV files with up- and down-regulated genes for downstream analysis on DAVID 
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_basal_up_080822.csv",tetR.basal.degs.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_basal_down_080822.csv",tetR.basal.degs.down,row.names = F)
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.tetR.basal.vector<-classify.degs(tetR.basal.degs.up,"UP")
down.tetR.basal.vector<-classify.degs(tetR.basal.degs.down,"DOWN")
#differential expression analysis for TetR response to tetracycline: TetR(+) vs TetR(-) 
tetR.response<-bayesT(handel.normalized.matrix[,7:12],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define DEGs
tetR.response.degs<-rownames(tetR.response)[which(tetR.response$BH < 0.05 & abs(tetR.response$meanC - tetR.response$meanE)>1)]
print(paste("TetR differentially expressed",length(tetR.response.degs),"genes in response to tetracycline",sep=" "))
#save fold-change for all genes
tetR.response.fold.change<-tetR.response$meanE - tetR.response$meanC
names(tetR.response.fold.change)<-rownames(tetR.response)
#define sets of up- and down-regulated genes
tetR.response.degs.up<-tetR.response.degs[which(tetR.response.fold.change[tetR.response.degs]>0)]
tetR.response.degs.down<-tetR.response.degs[which(tetR.response.fold.change[tetR.response.degs]<0)]
#write CSV files with up- and down-regulated genes for downstream analysis on DAVID 
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_response_up_080822.csv",tetR.response.degs.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR_response_down_080822.csv",tetR.response.degs.down,row.names = F)
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.tetR.response.vector<-classify.degs(tetR.response.degs.up,"UP")
down.tetR.response.vector<-classify.degs(tetR.response.degs.down,"DOWN")
#differential expression analysis for TetR(+TET) vs WT(+TET)
tetR.wt.response.comparison<-bayesT(handel.normalized.matrix[,c(4:6,10:12)],numC = 3,numE = 3,conf = 7,doMulttest = T)
tetR.wt.response.comparison.degs<-rownames(tetR.wt.response.comparison)[which(tetR.wt.response.comparison$BH < 0.05 & abs(tetR.wt.response.comparison$meanC - tetR.wt.response.comparison$meanE)>1)]
print(paste("TetR (+TET) differentially expressed",length(tetR.wt.response.comparison.degs),"genes with respect to WT(+TET)",sep=" "))
#save fold-change for all genes
tetR.wt.response.comparison.fold.change<-tetR.wt.response.comparison$meanE - tetR.wt.response.comparison$meanC
names(tetR.wt.response.comparison.fold.change)<-rownames(tetR.wt.response.comparison)
#define sets of up- and down-regulated genes
tetR.wt.response.comparison.degs.up<-tetR.wt.response.comparison.degs[which(tetR.wt.response.comparison.fold.change[tetR.wt.response.comparison.degs]>0)]
tetR.wt.response.comparison.degs.down<-tetR.wt.response.comparison.degs[which(tetR.wt.response.comparison.fold.change[tetR.wt.response.comparison.degs]<0)]
#count TFs, efflux pumps, metabolic genes and other genes in the up- and down-regulated genes
up.tetR.wt.response.comparison.vector<-classify.degs(tetR.wt.response.comparison.degs.up,"UP")
down.tetR.wt.response.comparison.vector<-classify.degs(tetR.wt.response.comparison.degs.down,"DOWN")
#create Figure 1A
#vector to generate blank space
zero.vector<-rep(0,4)
#selected colors for barplot
colors.barplot<-c("grey",rgb(253,174,97,maxColorValue = 255),rgb(166,217,106,maxColorValue = 255),rgb(215,25,28,maxColorValue = 255))
#generate barplot
#first, information regarding up-regulated genes
barplot(t(rbind(up.wt.vector,up.tetR.basal.vector,up.tetR.response.vector,up.tetR.wt.response.comparison.vector,zero.vector)),density=rep(15,4) 
        , angle=c(0,45,90,135) , col=colors.barplot,ylim=c(-500,500),ylab="Number of DEGs",main="Fig. 1A")
#then, add information regarding down-regulated genes is added
barplot(t(rbind(down.wt.vector,down.tetR.basal.vector,down.tetR.response.vector,down.tetR.wt.response.comparison.vector,zero.vector)),density=rep(15,4) , angle=c(0,45,90,135) ,col=colors.barplot,add=T)
abline(h=0,col="black",lwd=2)
```
1.3 Create Fig. S1 
```{r}
#Fig. S1 (fold-change of transcript level of fermentation-related genes)
#list of fermentation-related genes, compiled from EcoCyc database
fermentation.ecocyc<-read.csv("../Data/Input/fermentation_pathway_ecocyc.csv",header=F)
#function to translate gene name to locus tag
translate.gene.name.to.locus<-function(geneSet)
{
  output<-c()
  for(g in geneSet)
  {
  gene.pos<-grep(paste(g,"_",sep=""),rownames(transcriptional.network))
  if(length(gene.pos)==1)
  {
    output<-c(output,strsplit(rownames(transcriptional.network)[gene.pos],split="_")[[1]][2])
  }
  else
  {
    output<-c(output,g)
  }
  }
  output
}
#convert gene names to locus tags
fermentation.loci<-translate.gene.name.to.locus(fermentation.ecocyc[,1])
#add missing loci
fermentation.loci[15:16]<-c("b1675","b2929")
#create matrix with average expression value for WT and TetR strains in the presence/absence of tetracycline
handel.average.expression<-cbind(rowMeans(handel.normalized.matrix[,1:3]),rowMeans(handel.normalized.matrix[,4:6]),rowMeans(handel.normalized.matrix[,7:9]),rowMeans(handel.normalized.matrix[,10:12]))
#define matrix with average expression profile of fermentation genes
fermentation.pathway.average.expression.matrix<-handel.average.expression[as.vector(fermentation.loci),]
#add gene names information as rownames
rownames(fermentation.pathway.average.expression.matrix)<-as.vector(fermentation.ecocyc$V1)
#add column names
colnames(fermentation.pathway.average.expression.matrix)<-c("WT(-TET)","WT(+TET)", "TetR(-TET)", "TetR(+TET)")
#create heatmap displaying log2 fold-change with respect to untreated WT
#heatmap fold-change breaks
heatmap.breaks<-c(-3,-2,-1,-0.5,0,0.5,1,2)
#heatmap color palette
heatmap.colors<-colorRampPalette(rev(brewer.pal(7, "RdBu")) )(8)[8:1]
pheatmap(fermentation.pathway.average.expression.matrix[,2:4]-fermentation.pathway.average.expression.matrix[,1],scale="none",color =heatmap.colors[-5],cluster_rows=F,cluster_cols = F,fontsize = 8,angle_col = 90,
         breaks = heatmap.breaks,legend_breaks=heatmap.breaks,legend=T, main="Fig. S1")
#evaluate significance of fold-change using random permutation
#total permutations
N=10000
#average fold change of fermentation genes in the baseline change in TetR
true.average.fold.change.fermetation<-mean(fermentation.pathway.average.expression.matrix[,3]-fermentation.pathway.average.expression.matrix[,1])
random.average.fold.change<-c()
for(n in 1:N)
{
  random.selected.genes<-sample(1:nrow(handel.normalized.matrix),length(fermentation.loci))
  random.fold.change<-handel.average.expression[random.selected.genes,3]- handel.average.expression[random.selected.genes,1]
  random.average.fold.change<-c(random.average.fold.change,mean(random.fold.change))
}
#permutation p-value
permutation.pval.fermentation.genes<-length(which(abs(random.average.fold.change) >= true.average.fold.change.fermetation))/N
print(paste("Permutation p-value =",permutation.pval.fermentation.genes),
      sep="")
```
1.4 Create Fig. S2 (acrA, acrB and acrZ expression and fitness profile in the presence of tetracycline)
```{r}
#efflux pump genes of interest
acrA<-"b0463"
acrB<-"b0462"
acrZ<-"b0762"
#create Fig. S2A - expression profiles of selected genes
selected.genes.average.expression.matrix<-handel.average.expression[c(acrA,acrB,acrZ),2:4]-handel.average.expression[c(acrA,acrB,acrZ),1]
par(mfrow=c(1,1))
selected.genes.average.expression.matrix<-as.matrix(selected.genes.average.expression.matrix)
#add names of columns
colnames(selected.genes.average.expression.matrix)<-c("WT(+)","TetR(-)","TetR(+)")
#create barplot
barplot(selected.genes.average.expression.matrix,beside=T,col=c(rgb(141,160,203,maxColorValue = 255),rgb(252,141,98,maxColorValue = 255),rgb(102,194,165,maxColorValue = 255)),ylab="Log2 fold-change (vs. WT control)",ylim=c(-0.5,1.5),main="Fig. S2A")
 legend("topleft",fill=c(rgb(141,160,203,maxColorValue = 255),rgb(252,141,98,maxColorValue = 255),rgb(102,194,165,maxColorValue = 255)),legend = 
       c("AcrA","AcrB","AcrZ"))
#Fig. S2B
#load fitness data from Nichols et al. (Cell 2011)
fitness.scores<-read.xls("../Data/Miscellaneous_files/phenotipic landscape table s2.xls",header=T,fill=T)
#create boxplot for selected genes in tetracycline conditions
tetracycline.conditions<-grep("TETRACYCLINE",colnames(fitness.scores))
boxplot(ylim=c(-12,2),t(fitness.scores[c(3965,1278,2819),tetracycline.conditions]),
        col=c(rgb(141,160,203,maxColorValue = 255),rgb(252,141,98,maxColorValue = 255),rgb(102,194, 165,maxColorValue = 255)),outline=T,boxlty=1,whisklty = 1, staplelty = 1, names=c("acrA*","acrB*","acrZ"),frame=F,ylab="Deletion fitness score in tetracycline",cex.lab=1.1,main="Fig. S2B")
abline(h=0,lty=2)
```
1.5 Create Fig. 1B and accompanying Table S2
```{r}
#perform differential expression analysis for TetR(+TET) vs WT(-TET)
tetR.plus.vs.WT.minus<-bayesT(handel.normalized.matrix[,c(1:3,10:12)],numC = 3,numE = 3,conf = 7,doMulttest = T)
#define DEGs
tetR.plus.vs.WT.minus.degs<-rownames(tetR.plus.vs.WT.minus)[which(tetR.plus.vs.WT.minus$BH < 0.05 & abs(tetR.plus.vs.WT.minus$meanC - tetR.plus.vs.WT.minus$meanE)>1)]
print(paste("TetR(+TET) differentially expressed",length(tetR.plus.vs.WT.minus.degs),"genes with respect to WT(-TET)",sep=" "))
#define fold-change vector
tetR.plus.vs.WT.minus.fold.change<-tetR.plus.vs.WT.minus$meanE - tetR.plus.vs.WT.minus$meanC
names(tetR.plus.vs.WT.minus.fold.change)<-rownames(tetR.plus.vs.WT.minus)
#define up- and down-regulated genes
tetR.plus.vs.WT.minus.degs.up<-tetR.plus.vs.WT.minus.degs[which(tetR.plus.vs.WT.minus.fold.change[tetR.plus.vs.WT.minus.degs]>0)]
tetR.plus.vs.WT.minus.degs.down<-tetR.plus.vs.WT.minus.degs[which(tetR.plus.vs.WT.minus.fold.change[tetR.plus.vs.WT.minus.degs]<0)]
#save CSV files with up- and down-regulated genes for downstream DAVID analysis
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR+TET_WT-TET_upregulated_081522.csv",tetR.plus.vs.WT.minus.degs.up,row.names = F)
#write.csv(file="../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Input/tetR+TET_WT-TET_downregulated_081522.csv",tetR.plus.vs.WT.minus.degs.down,row.names = F)
#Fig. 1B - Heatmap for selected functional terms
#read DAVID output files (i.e. functional enrichment) for WT response
#compile set of terms associated with WT response (down-regulated genes)
david.wt.response.down<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_wt_response_down.csv",header=T)
significant.terms.wt.response.down<-as.character(david.wt.response.down$Term[which(david.wt.response.down$Benjamini < 0.05)])
#start vector with all enriched terms
all.enriched.terms<-significant.terms.wt.response.down
#compile set of terms associated with up-regulated genes as part of the WT response
david.wt.response.up<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_wt_response_up.csv",header=T)
significant.terms.wt.response.up<-as.character(david.wt.response.up$Term[which(david.wt.response.up$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.wt.response.up)
#read DAVID output (i.e. functional enrichment) for TetR basaline changes
#define set of terms associated with TetR basal response (down-regulated genes) 
david.tetR.basal.down<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_basal_down.csv",header=T)
significant.terms.tetR.basal.down<-as.character(david.tetR.basal.down$Term[which(david.tetR.basal.down$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.basal.down)
#define set of terms associated with up-regulated genes as part of the TetR basal response
david.tetR.basal.up<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_basal_up.csv",header=T)
significant.terms.tetR.basal.up<-as.character(david.tetR.basal.up$Term[which(david.tetR.basal.up$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.basal.up)
#read DAVID output (i.e. functional enrichment) for TetR(+TET) vs WT(-TET) 
#define set of terms associated with down-regulated genes
david.tetR.response.down<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_plus_vs_wt_minus_down.csv",header=T)
significant.terms.tetR.response.down<-as.character(david.tetR.response.down$Term[which(david.tetR.response.down$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.response.down)
#define set of terms associated with up-regulated genes
david.tetR.response.up<-read.csv("../Data/Differential_expression_analysis/DAVID_enrichment_analysis/Output/Latest_output_2022/david_tetR_plus_vs_wt_minus_up.csv",header=T)
significant.terms.tetR.response.up<-as.character(david.tetR.response.up$Term[which(david.tetR.response.up$Benjamini < 0.05)])
#add enriched terms to vector with all over-represented terms
all.enriched.terms<-union(all.enriched.terms,significant.terms.tetR.response.up)
#create matrix with information for all three comparisons with respect to the untreated WT strain
functional.enrichment.matrix<-matrix(ncol=3,nrow=length(all.enriched.terms),dimnames = list(all.enriched.terms,c("WT(+)","TetR(-)","TetR(+)")),0)
#fill out the  enrichment matrix 
#up- and down-regulated genes are indicated as 1s and -1s, respectively
#fill first column: WT(+) vs WT(-)
functional.enrichment.matrix[significant.terms.wt.response.down,"WT(+)"]<- -1
functional.enrichment.matrix[significant.terms.wt.response.up,"WT(+)"]<- 1
#fill second column: TetR(-) vs WT(-) 
functional.enrichment.matrix[significant.terms.tetR.basal.down,"TetR(-)"]<- -1
functional.enrichment.matrix[significant.terms.tetR.basal.up,"TetR(-)"]<- 1
#fill third column: TetR(+) vs WT(-)
functional.enrichment.matrix[significant.terms.tetR.response.down,"TetR(+)"]<- -1
functional.enrichment.matrix[significant.terms.tetR.response.up,"TetR(+)"]<- 1
#create sub-matrix with selected functional terms (for Fig. 1B)
#functional terms were manually selected
matrix.selected.terms<-functional.enrichment.matrix[c(1,15,19,26,31,23,25,28,17,39,38,71,81,51,41,48,75,77,74,49,50,61,52,58,73,82,80,84,87:89),]
#Fig. 1B
pheatmap(t(matrix.selected.terms),color =bluered(3)[3:1],scale = "none",cluster_rows=F,cluster_cols = F,
         fontsize = 8,angle_col = 90,main="Fig. 1B")
#create table S2
#initialize matrix
tableS2.matrix<-matrix(ncol=5,nrow=length(all.enriched.terms),
                         dimnames = list(all.enriched.terms,c("WT(+)","TetR(-)","TetR(+)","Total number of genes associated with term","P-value")),0)
#fill relevant information for over-represented terms in WT response  
#down-regulated terms 
for(t in significant.terms.wt.response.down)
 {
   term.position<-which(david.wt.response.down$Term == t)
   #fill row information
   tableS2.matrix[t,c(1,4)]<- as.matrix(david.wt.response.down[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.wt.response.down[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
    }
}
#same process for up-regulated terms
for(t in significant.terms.wt.response.up)
 {
   term.position<-which(david.wt.response.up$Term == t)
   #fill row information
   tableS2.matrix[t,c(1,4)]<- as.matrix(david.wt.response.up[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.wt.response.up[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
    }
}
 #fill information for TetR basal response column 
 #down-regulated terms  
  for(t in significant.terms.tetR.basal.down)
 {
   term.position<-which(david.tetR.basal.down$Term == t)
   tableS2.matrix[t,c(2,4)]<- as.matrix(david.tetR.basal.down[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.basal.down[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
  }
 #for up-regulated terms
 for(t in significant.terms.tetR.basal.up)
 {
   term.position<-which(david.tetR.basal.up$Term == t)
   tableS2.matrix[t,c(2,4)]<- as.matrix(david.tetR.basal.up[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.basal.up[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
 }
 #fill relevant information for TetR(+TET) vs WT(-TET) 
 #down-regulated terms  
 for(t in significant.terms.tetR.response.down)
 {
   term.position<-which(david.tetR.response.down$Term == t)
   tableS2.matrix[t,c(3,4)]<- as.matrix(david.tetR.response.down[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.response.down[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
     tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
 }
 #for up-regulated terms  
 for(t in significant.terms.tetR.response.up)
 {
   term.position<-which(david.tetR.response.up$Term == t)
   tableS2.matrix[t,c(3,4)]<- as.matrix(david.tetR.response.up[term.position,c("Count","Pop.Hits")])
   #add p-value information
   term.pval<-david.tetR.response.up[term.position,"Benjamini"]
   if(tableS2.matrix[t,5]==0)
   {
    tableS2.matrix[t,5]<-term.pval
   }
   else
   {
     tableS2.matrix[t,5]<-paste(tableS2.matrix[t,5],term.pval,sep=";")
   }
 }
print(tableS2.matrix)
```
1.5 Fig. 1C-Expression profile of aerobic respiration related genes
```{r}
#read list of TCA, electron transport chain and ATP synthase genes
aerobic.respiration.table<-read.csv("../Data/Input/aerobic_respiration_genes.csv")
aerobic.respiration.loci<-aerobic.respiration.table$Locus
aerobic.respiration.average.expression<-handel.average.expression[as.vector(aerobic.respiration.loci),]
rownames(aerobic.respiration.average.expression)<-aerobic.respiration.table$Name
#define heatmap breaks
heatmap.breaks<-c(-4,-2,-1,0,0.5,1,1.5)
pheatmap(aerobic.respiration.average.expression[,2:4]-aerobic.respiration.average.expression[,1],scale="none",color =colorRampPalette(rev(brewer.pal(6, "RdBu")) )(6)[6:1],
         cluster_rows=F,cluster_cols = F,fontsize = 8,angle_col = 90,
         breaks = heatmap.breaks,legend_breaks=heatmap.breaks,legend=T, main="Fig. 1C")
```
2.1 Table 1
```{r}
#function to generate input files for NetSurgeon algorithm (Brent et al. PNAS 2016)
create.netSurgeon.input<-function(input.network,bayesian.ttest.output,name.output.file)
{
#change gene names (for both rows and columns) to loci in the input network
rownames(input.network)<-sapply(1:nrow(input.network),function(x){strsplit(rownames(input.network)[x],split="_")[[1]][2]})
colnames(input.network)<-sapply(1:ncol(input.network),function(x){strsplit(colnames(input.network)[x],split="_")[[1]][2]})
#define genes present in both datasets (i.e., expression matrix and transcriptional network)
genes.present.in.both.datasets<-intersect(rownames(bayesian.ttest.output),rownames(input.network))
#filter the transcriptional network accordingly
input.network<-input.network[genes.present.in.both.datasets,]
#remove from the TRN those TFs with less than five targets 
input.network<-input.network[,-1*which(colSums(abs(input.network))<5)]
#create input network for NetSurgeon 
#the format of the input network is TFs x Genes
#save input file
write.table(t(input.network),col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".mtr",sep=""))
#extract q-values from Bayesian T-tests
q.values<-bayesian.ttest.output[rownames(input.network),"BH"]
#replace zeroes with minimum q-value among the non-zero values to avoid infinity during the log conversion
position.zero.q.values<-which(q.values ==0)
if(length(position.zero.q.values)>0)
{
  q.values[position.zero.q.values]<-min(q.values[-1* position.zero.q.values])
}
#convert q-values of not differentially expressed genes to 1
q.values[which(q.values > 0.05 | abs(bayesian.ttest.output[rownames(input.network),"meanC"]-bayesian.ttest.output[rownames(input.network),"meanE"])<1)]=1
#estimate the sign of fold-change for each gene
fold.change.sign<- sign(bayesian.ttest.output[rownames(input.network), "fold"])
#sign -log q-values based on fold change direction
DEvector<- fold.change.sign * -log2(q.values) 
#remove any NA
DEvector[which(is.na(DEvector))]<-0
#save input files
write.table(DEvector,sep=" ",col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".vect",sep=""))
regulators<-colnames(input.network)
write.table(regulators,sep=" ",col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".tfs",sep=""))
targets<-rownames(input.network)
write.table(targets,sep=" ",col.names=F,row.names=F,quote=F,file=paste("../Data/Network_analysis/NetSurgeon/Input/",name.output.file,"_",Sys.Date(),".orfs",sep=""))
}
#create files for WT response to tetracycline
create.netSurgeon.input(input.network = transcriptional.network,bayesian.ttest.output = wt.response,name.output.file = "WT_TET")
#create files for TetR at baseline
create.netSurgeon.input(input.network = transcriptional.network,bayesian.ttest.output = tetR.basal,name.output.file = "TetR_basal")
#create files for TetR response to tetracycline
create.netSurgeon.input(input.network = transcriptional.network,bayesian.ttest.output = tetR.response,name.output.file = "TetR_response")
#read compiled NetSurgeon output to create Table 1 (only top 15 TFs for each comparison were considered as differentially active)
netsurgeon.output<-read.csv("../Data/Network_analysis/NetSurgeon/Output/table1_raw_compiled_NetSurgeon_output.csv",header=T)
#initialize table 1-add information about regulon size
table1<-c()
transcriptional.network.loci.tags<-transcriptional.network
#change row- and column names
rownames(transcriptional.network.loci.tags)<-sapply(1:nrow(transcriptional.network.loci.tags),function(x){strsplit(rownames(transcriptional.network.loci.tags)[x],split="_")[[1]][2]})
colnames(transcriptional.network.loci.tags)<-sapply(1:ncol(transcriptional.network.loci.tags),function(x){strsplit(colnames(transcriptional.network.loci.tags)[x],split="_")[[1]][2]})
temporal.table<-c()
for(tf in netsurgeon.output$Locus.tag)
{
  #number of targets of current TF
  regulon.size<-length(intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),rownames(handel.normalized.matrix)))
  #differentially expressed targets that are part of the baseline change
  de.targets.tetR.basal<-length(intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),tetR.basal.degs))
  #differentially expressed targets that are part of TetR response to tetracycline
  de.targets.tetR.adaptive<-length(intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),tetR.response.degs))
  #add collected information to table
  temporal.table<-rbind(temporal.table,cbind(regulon.size                                         ,de.targets.tetR.basal,de.targets.tetR.adaptive))
}
#add column names
colnames(temporal.table)<-c("Regulon size","Targets-basal","Targets-adaptive")
#combine netsurgeon output with temporal table
table1<-cbind(netsurgeon.output[,-1*2:4],temporal.table)
#add loci information
table1<-cbind(convert.locus.to.gene.name(table1[,"Locus.tag"]),table1)
colnames(table1)[1]<-"Transcription factor"
#print Table # 1
print(table1[order(table1[,"Regulon size"],decreasing = T),])
#evaluate how many DEGs are controlled by the 25 TFs
#all genes differentially expressed in the TetR strain background (baseline combined with adaptive response)
tetR.DEGs<-union(tetR.basal.degs,tetR.response.degs)
#compile genes that are differentially expressed by TetR and are regulated by the 25 TFs in NetSurgeon output
genes.affected.by.tetR.phenotype<-c()
for(tf in table1[,"Locus.tag"])
{
  #define members of current TF regulon that are differentially expressed in basal or adaptive response
  de.targets.tetR<-intersect(names(which(transcriptional.network.loci.tags[,tf]!=0)),tetR.DEGs)
  #add genes into compilation set
  genes.affected.by.tetR.phenotype<-union(genes.affected.by.tetR.phenotype,de.targets.tetR)
}
print(length(genes.affected.by.tetR.phenotype))
#similar analysis but counting DEGs controlled by the 15 TFs associated with TetR response to tetracycline (i.e., adaptive TFs)
#define adaptive TFs
adaptive.tfs<-table1[-1*(which(table1[,"Response"]=="Basal")),"Locus.tag"]
#vector with members of adaptive TFs that were differentially expressed by TetR in response to tetracycline
target.genes.tetR.response<-c()
#vector with all members of adaptive TF regulons
adaptive.tfs.total.targets<-c()
for(tf in adaptive.tfs)
{
  #define current TF regulon
  current.tf.regulon<-names(which(transcriptional.network.loci.tags[,tf]!=0))
  #define members of current TF regulon that were differentially expressed as part TetR adaptive response
  de.targets.adaptive<-intersect(current.tf.regulon,tetR.response.degs)
  #add genes into corresponding sets
  target.genes.tetR.response<-union(target.genes.tetR.response,de.targets.adaptive)
  adaptive.tfs.total.targets<-union(adaptive.tfs.total.targets,intersect(current.tf.regulon,rownames(handel.normalized.matrix)))
}
print(length(target.genes.tetR.response))
#compute hyper-geometric test p-value
phyper(length(target.genes.tetR.response)-1,length(adaptive.tfs.total.targets),nrow(handel.normalized.matrix)-length(adaptive.tfs.total.targets),length(tetR.response.degs),lower.tail = F)
```
2.2 Fig 2A: interactions among differentially active TFs
```{r}
#initialize TF-TF network
tf.tf.network<-c()
#add TF-TF interactions that involve TFs included in Table 1
for(tf in 1:nrow(table1))
{
  #current TF
  tf.locus<-as.character(table1[tf,"Locus.tag"])
  tf.name<-as.character(table1[tf,"Transcription factor"])
  #TF regulon 
  tf.regulon<-names(which(transcriptional.network.loci.tags[,tf.locus]!=0))
  #target TFs - excluding autoregulation
  target.tfs<-setdiff(intersect(tf.regulon,as.character(table1[,"Locus.tag"])),tf.locus)
  #add relevant interactions
  if(length(target.tfs) > 0)
  {
    #sign (activation or repression) of TF-TF interactions
    interaction.signs<-sign(transcriptional.network.loci.tags[as.character(target.tfs),tf.locus])
    #expand the TF-TF network
    tf.tf.network<-rbind(tf.tf.network,cbind(rep(tf.name,length(target.tfs)),convert.locus.to.gene.name(target.tfs),interaction.signs))
  }
}
#add column names
colnames(tf.tf.network)<-c("Regulator","Target","Sign")
#extend the TF-TF network to take into account subset of target genes that were differentially expressed (DE) in TetR response
#focused on the 15 TFs associated with TetR adaptive response and their DE targets
temporal.subnetwork.matrix<-transcriptional.network.loci.tags[as.character(target.genes.tetR.response),adaptive.tfs]
#define unique profiles in transcriptional subnetwork (i.e., keeping a single row to represent all genes with the same regulators in the temporal.subnetwork.matrix)
unique.gene.profiles<-unique.matrix(temporal.subnetwork.matrix)
#groups of genes with same regulators
gene.groups<-list()
#regulators of each gene group
tf.groups<-c()
for (r in 1:nrow(unique.gene.profiles))
{
  temporal.gene.group<-c()
  current.profile<-unique.gene.profiles[r,]
  #define which genes in the temporal.subnetwork.matrix have the same profile (i.e., same regulators)
  for(g in target.genes.tetR.response)
  {
    if(apply(temporal.subnetwork.matrix[g,]==t(current.profile),1,all))
    {
      temporal.gene.group<-c(temporal.gene.group,g)
    }
  }
  #only save groups with more than five genes
  if(length(temporal.gene.group)>5)
  {
    gene.groups[[length(gene.groups)+1]]<-temporal.gene.group
    group.regulators<-convert.locus.to.gene.name(names(which(current.profile!=0)))
    tf.groups<-rbind(tf.groups,cbind(group.regulators,                              rep(paste("Group",length(gene.groups),sep=""),length(group.regulators)),length(temporal.gene.group)))
   write.csv(file=paste("../Data/Network_analysis/NetSurgeon/Output/temporal_coregulated_gene_groups_adaptive_response_g",length(gene.groups),"_",Sys.Date(),".csv",sep=""),gene.groups[[length(gene.groups)]])
    }
}
#add column names
colnames(tf.groups)<-c("Regulator","Target","#number of genes")
#merge the gene clusters with the TF-TF network
tf.tf.network.unsigned<-rbind(tf.tf.network[,1:2],tf.groups[,1:2])
#plot resulting gene network - Fig. 2A draft. 
#the TF-TF network in manuscript was visualized with Cytoscape
tf.tf.network.igraph.format<-graph_from_data_frame(tf.tf.network.unsigned                                              ,union(tf.tf.network.unsigned[,"Regulator"],tf.tf.network.unsigned[,"Target"]), directed = T)
selected.network.layout <- layout_nicely(tf.tf.network.igraph.format)
plot(tf.tf.network.igraph.format,layout = selected.network.layout, edge.arrow.size =0.2,vertex.label.cex=0.5,vertex.size=13,main="Fig. 2A (draft)")
#complement Netsurgeon results with analysis of E. coli EGRIN2 model (Brooks et al. 2014, MSB)
#read EGRIN2 corems (in simplified terms, a corem is a group of co-regulated genes)
ecoli.corems.table<-read.csv("../Data/Network_analysis/EGRIN_model/ecoli_egrin2_corems.csv")
#define E. coli corems
ecoli.corems<-list()
#save names of all genes included the E. coli EGRIN model
ecoli.egrin2.genes<-c()
#save number of genes in each corem
corems.size<-c()
for(s in 1:nrow(ecoli.corems.table))
{
  current.corem<-strsplit(as.character(ecoli.corems.table$Genes[s]),split=";",fixed = T)[[1]]
  ecoli.corems[[length(ecoli.corems)+1]]<-current.corem
  corems.size<-c(corems.size,length(current.corem))
  ecoli.egrin2.genes<-union(ecoli.egrin2.genes,current.corem)
}
#name corems as c1, c2, etc.
names(ecoli.corems)<-paste("c",1:length(ecoli.corems),sep="")
#function to identify corems enriched with a particular (user-specified) set of genes
evaluate.corem.enrichment<-function(corem.set,gene.set)
{
  #matrix with p-values
  corem.enrichment = matrix(nrow=length(corem.set),ncol=5)
  colnames(corem.enrichment)<-c('q','m','n','k','p_value')
  rownames(corem.enrichment)<-names(corem.set)
  for(x in 1:length(corem.set)) {
    q = length(intersect(corem.set[[x]],gene.set))
    m = length(corem.set[[x]])
    k = length(intersect(gene.set,unique(ecoli.egrin2.genes)))
    n = length(ecoli.egrin2.genes)-m
    pval = NA
    #if the intersect is bigger than zero
    if(q > 0) {
      #compute hypergeometric test p-value
      pval = phyper(q,m,n,k,lower.tail=F)
    }
    #add values to matrix
    corem.enrichment[x,] = c(q,m,n,k,pval)
  }
  #perform multiple hypotheses testing correction
  corem.enrichment = cbind(corem.enrichment,p_value.BH=p.adjust(corem.enrichment[,'p_value'],method='BH',n=nrow(corem.enrichment)))
  #select enriched corems with ten or more members
  final.enriched.corems = rownames(corem.enrichment)[which(corem.enrichment[,'p_value.BH']<=0.05 & corem.enrichment[,'m']> 9)]
  output<-corem.enrichment[final.enriched.corems,]
  output
}
#evaluate enrichment of EGRIN corems (also known as modules) with DEGs of TetR baseline and adaptive responses
modules.enriched.with.tetR.baseline<-evaluate.corem.enrichment(ecoli.corems, tetR.basal.degs)
modules.enriched.with.tetR.adaptive<-evaluate.corem.enrichment(ecoli.corems, tetR.response.degs)
#save names of TF regulons over-represented in corems enriched with TetR basaline changes
corems.enriched.with.tetR.basal.TF.regulons<-c()
#save names of TF regulons over-represented in corems enriched with TetR adaptive response 
corems.enriched.with.tetR.adaptive.TF.regulons<-c()
#save genes in corems enriched with both TetR DEGs AND targets of NetSurgeon predicted TFs
corems.enriched.with.tf.regulons.and.degs<-c()
#save ID of corems enriched with both gene sets
significant.corems<-c()
#-1*c(2,3,5,8,11,16,19,20)
for(tf in table1[,"Locus.tag"])
{
  current.regulon<-names(which(transcriptional.network.loci.tags[,tf]!=0))
  #identify corems enriched with current TF regulon
  temporal.enrichment.information<-evaluate.corem.enrichment(ecoli.corems, current.regulon)
  #define corems enriched with current TF regulon and TetR basal response
  intersect.corems<-intersect(rownames(modules.enriched.with.tetR.baseline),rownames(temporal.enrichment.information))
  #if there is one or more corems enriched with both gene sets
  if(length(intersect.corems)>0)
  {
    corems.enriched.with.tetR.basal.TF.regulons<-c(corems.enriched.with.tetR.basal.TF.regulons,tf)
    #compile members of the corems enriched with both gene sets
    for(c in intersect.corems)
    {
      corems.enriched.with.tf.regulons.and.degs<-union(corems.enriched.with.tf.regulons.and.degs,ecoli.corems[[c]])
      significant.corems<-union(significant.corems,c)
    }
  }
  #repeat analysis for TetR adaptive response
  #define corems enriched with current TF regulon and TetR adaptive response
  intersect.corems<-intersect(rownames(modules.enriched.with.tetR.adaptive),rownames(temporal.enrichment.information))
  if(length(intersect.corems)>0)
  {
    corems.enriched.with.tetR.adaptive.TF.regulons<-c(corems.enriched.with.tetR.adaptive.TF.regulons,tf)
    for(c in intersect.corems)
    {
      corems.enriched.with.tf.regulons.and.degs<-union(corems.enriched.with.tf.regulons.and.degs,ecoli.corems[[c]])
      significant.corems<-union(significant.corems,c)
    }
  }
}
#define final set of TFs identified by both NetSurgeon and EGRIN-based analysis
tfs.detected.by.egrin.analysis<-union(corems.enriched.with.tetR.basal.TF.regulons,corems.enriched.with.tetR.adaptive.TF.regulons)
print(convert.locus.to.gene.name(tfs.detected.by.egrin.analysis))
#identify TFs with differential regulatory activity (alternative approach)
#we first estimate TF activity
genes.present.in.transcriptional.network.and.expression.matrix<-intersect(rownames(transcriptional.network.loci.tags),rownames(handel.normalized.matrix))
final.trn.network<-transcriptional.network.loci.tags[genes.present.in.transcriptional.network.and.expression.matrix,]
#remove TFs with less than five target genes
final.trn.network<-final.trn.network[,-1 * which(colSums(abs(final.trn.network))<5)]
#keep only shared genes in the expression matrix
tetracycline.expression.matrix<-as.matrix(handel.normalized.matrix[genes.present.in.transcriptional.network.and.expression.matrix,])
#compute pseudoinverse of the transcriptional network
pseudoinverse.matrix <- pseudoinverse(final.trn.network)
#compute TF activities
estimated.tf.activities <- pseudoinverse.matrix %*% tetracycline.expression.matrix
rownames(estimated.tf.activities)<- colnames(final.trn.network)
#perform t-test to compare estimated TFAs across strains/conditions
#vector with p-values
tfa.pvals<-c()
#vector with regulon size
regulon.size<-c()
#vector with p-values for TetR basal response: TetR(-TET) vs WT(-TET)
pvalues.activity.tetR.basaline<-c()
#vector with p-values for TetR adaptive response: TetR(+TET) vs TetR(-TET)
pvalues.activity.tetR.adaptive<-c()
for(r in 1:nrow(estimated.tf.activities))
{
  #perform t-tests
  temporal.pvalue<-t.test(estimated.tf.activities[r,7:9],estimated.tf.activities[r,1:3])$p.value
  pvalues.activity.tetR.basaline<-c(pvalues.activity.tetR.basaline,temporal.pvalue)
  temporal.pvalue2<-t.test(estimated.tf.activities[r,7:9],estimated.tf.activities[r,10:12])$p.value
  pvalues.activity.tetR.adaptive<-c(pvalues.activity.tetR.adaptive,temporal.pvalue2)
}
#perform multiple hypothesis correction
ttest.activities.matrix<-cbind(estimated.tf.activities,pvalues.activity.tetR.basaline,p.adjust(pvalues.activity.tetR.basaline,method = "BH"),pvalues.activity.tetR.adaptive,p.adjust(pvalues.activity.tetR.adaptive,method = "BH"))
colnames(ttest.activities.matrix)[c(13:16)]<-c("p.value.baseline","adj.p.value.baseline","p.value.adaptive","adj.p.value.adaptive")
#Check change in activity of the 25 TFs previously identified by NetSurgeon
print(cbind(convert.locus.to.gene.name(table1[,"Locus.tag"]),round(ttest.activities.matrix[as.character(table1[,"Locus.tag"]),13:16],digits=4)))
```
2.3 Fig. 2B - Fold change of selected TF regulons
```{r}
#define matrix with (average) fold-change values with respect to untreated WT
handel.foldchange.matrix<-handel.average.expression[,2:4]-handel.average.expression[,1]
#function to compute significance of fold-change of a given set of genes vs random selection
    permutation.function<-function(true.genes,column.of.fold.change.matrix)
    {
      #number of random gene sampling
      N=10000
      true.average.fold.change<-mean(handel.foldchange.matrix[true.genes,column.of.fold.change.matrix])
      random.compilation<-c()
    for(n in 1:N)
    {
      random.fold.change<-handel.foldchange.matrix[sample(1:nrow(handel.foldchange.matrix),length(true.genes)),column.of.fold.change.matrix]
      random.compilation<-c(random.compilation,mean(random.fold.change))
    }
      #conservative p-value estimation (minimum of one-tailed vs two-tailed test)
      estimated.pvalue<-min(c(length(which(random.compilation >= true.average.fold.change))/N,
                              length(which(random.compilation <= true.average.fold.change))/N,
                              length(which(abs(random.compilation)>=abs(true.average.fold.change)))))
      #return estimated p-value
      estimated.pvalue
    }
#generate boxplots for selected TFs (predicted as differentially active based on network analyses above)    
selected.tfs<-c("arcA","marA","gadE")
for(tf.name in selected.tfs)
{
  tf.locus<-translate.gene.name.to.locus(tf.name)
  if(tf.name=="arcA")
  {
   #focus on ArcA repressed genes 
   #ArcA is mainly a repressor: it represses 73.2% of its targets
   #furthermore, 80% of ArcA DE target genes are repressed by ArcA
   current.regulon.repression<-names(which(final.trn.network[,tf.locus]<0))
   #focus on DEGs (either in the TetR basaline or adaptive response)
   current.regulon.repression<-intersect(current.regulon.repression,tetR.DEGs)
   #fold-change matrix with DE target genes
   temporal.fold.change.matrix<-rbind(cbind(handel.foldchange.matrix[current.regulon.repression,1],rep("WT(+)",length(current.regulon.repression))),
                           cbind(handel.foldchange.matrix[current.regulon.repression,2],rep("TetR(-)",length(current.regulon.repression))),
                           cbind(handel.foldchange.matrix[current.regulon.repression,3],rep("TetR(+)",length(current.regulon.repression))))
    temporal.fold.change.matrix<-as.data.frame(temporal.fold.change.matrix)
    #define column names
    colnames(temporal.fold.change.matrix)<-c("Expression","Strain")
    temporal.fold.change.matrix$Expression<-as.numeric(as.vector(temporal.fold.change.matrix$Expression))
    temporal.fold.change.matrix$Strain<-factor(temporal.fold.change.matrix$Strain,levels = c("WT(+)","TetR(-)","TetR(+)"),ordered = TRUE)
    #create barplot with ggplot
    barplot.arcA <- ggplot(temporal.fold.change.matrix, aes(x=Strain, y=Expression,fill=Strain)) + 
      geom_boxplot(color="red")+ theme(text = element_text(size=24))
    print(barplot.arcA+scale_fill_manual(values=rep("white",3)) + 
    ggtitle(paste(tf.name,"--|",length(current.regulon.repression),"genes",sep=" ")))
    #run random permutation test for each comparison (e.g., WT+TET vs WT-TET, TetR-TET vs WT-TET, etc.)
    print(paste(tf.name,"-WT+TET vs WT-TET:",permutation.function(current.regulon.repression,1),sep=""))
    print(paste(tf.name,"-TetR-TET vs WT-TET:",permutation.function(current.regulon.repression,2),sep=""))
    print(paste(tf.name,"-TetR+TET vs WT-TET:",permutation.function(current.regulon.repression,3),sep=""))
  }
  #if TF is MarA or GadE
  #both MarA and GadE mainly act as activators (i.e., positively regulate >79% of their target genes)
  else
  {
  current.regulon.activation<-names(which(final.trn.network[,tf.locus]>0))
  if(length(current.regulon.activation)>0)
    {
   #focus on DEGs (either in the TetR basaline or adaptive response)
   current.regulon.activation<-intersect(current.regulon.activation,tetR.DEGs)
   #fold-change matrix with DE target genes
   temporal.fold.change.matrix<-rbind(cbind(handel.foldchange.matrix[current.regulon.activation,1],rep("WT(+)",length(current.regulon.activation))),
                           cbind(handel.foldchange.matrix[current.regulon.activation,2],rep("TetR(-)",length(current.regulon.activation))),
                           cbind(handel.foldchange.matrix[current.regulon.activation,3],rep("TetR(+)",length(current.regulon.activation))))
  
    temporal.fold.change.matrix<-as.data.frame(temporal.fold.change.matrix)
    colnames(temporal.fold.change.matrix)<-c("Expression","Strain")
    temporal.fold.change.matrix$Expression<-as.numeric(as.vector(temporal.fold.change.matrix$Expression))
    temporal.fold.change.matrix$Strain<-factor(temporal.fold.change.matrix$Strain,levels = c("WT(+)","TetR(-)","TetR(+)"),ordered = TRUE)
    #create barplot with ggplot
    barplot.activation <- ggplot(temporal.fold.change.matrix, aes(x=Strain, y=Expression,fill=Strain)) + 
      geom_boxplot(color="green")+ theme(text = element_text(size=24))
    print(barplot.activation+scale_fill_manual(values=rep("white",3)) +  ggtitle(paste(tf.name,"-->",length(current.regulon.activation),"genes",sep=" "))) 
  }
    #run random permutation test for each comparison (e.g., WT+TET vs WT-TET, TetR-TET vs WT-TET, etc.)
     print(paste(tf.name,"-WT+TET vs WT-TET:",permutation.function(current.regulon.activation,1),sep=""))
    print(paste(tf.name,"-TetR-TET vs WT-TET:",permutation.function(current.regulon.activation,2),sep=""))
    print(paste(tf.name,"-TetR+TET vs WT-TET:",permutation.function(current.regulon.activation,3),sep=""))
  }
}
```
2.3 Evaluating overlap between DEGs in TetR adaptive response and arcA KO in anaerobic growth (GEO accession ID: GSE1107)
```{r}
#transcriptional data for arcA KO was sourced from Covert et al. (Nature 2004)
arcA.ko.matrix.covert<-read.csv("../Data/Differential_expression_analysis/GEO_Covert2004/arcA_ko_Covert2004.csv",header=T,row.names = 1)
#select replicates for WT and arcA KO in anaerobic condition
arcA.ko.matrix.covert<-arcA.ko.matrix.covert[,c("ana_wt1","ana_wt2","ana_wt3","ana_wt4","ana_arcA1","ana_arcA2","ana_arcA3")]
#rename columns
colnames(arcA.ko.matrix.covert)<-c("anaerobic_wt1","anaerobic_wt2","anaerobic_wt3","anaerobic_wt4","anaerobic_arcA1","anaerobic_arcA2","anaerobic_arcA3")
#read microarray probes to loci map
arcA.microarray.probes.loci.map<-read.csv("../Data/Differential_expression_analysis/GEO_Covert2004/arcA_covert2004_probes_gene_map.csv")
#filter out those genes not included in the expression matrix for WT and TetR previously analyzed
arcA.microarray.probes.loci.map<-arcA.microarray.probes.loci.map[arcA.microarray.probes.loci.map$ORF %in% rownames(handel.normalized.matrix),]
#this will be the final matrix with locus tags as rownames
final.arcA.ko.matrix.covert<-c()
#store set of genes present in the resulting expression matrix
genes.included.in.covert.data<-c()
#change probe names to loci
for(g in 1:nrow(arcA.microarray.probes.loci.map))
{
  current.gene<-as.character(arcA.microarray.probes.loci.map$ORF[g])
  genes.included.in.covert.data<-c(genes.included.in.covert.data,current.gene)
  current.gene.position<-grep(current.gene,rownames(arcA.ko.matrix.covert))
  final.arcA.ko.matrix.covert<-rbind(final.arcA.ko.matrix.covert,arcA.ko.matrix.covert[current.gene.position,])
}
rownames(final.arcA.ko.matrix.covert)<-genes.included.in.covert.data
#log2 transform the microarray data
final.arcA.ko.matrix.covert<-log2(final.arcA.ko.matrix.covert)
#perform differential expression analysis (as previously done above)
arcA.ko.response.covert<-bayesT(final.arcA.ko.matrix.covert,numC = 4,numE = 3,conf = 7,doMulttest = T)
#DEGs due to arcA deletion
arcA.degs.covert<-rownames(arcA.ko.response.covert)[which(arcA.ko.response.covert$BH < 0.05 & abs(arcA.ko.response.covert$meanC - arcA.ko.response.covert$meanE)>1)]
#define overlap between DEGs in arcA KO and TetR response to tetracycline
overlapping.DEGs<-intersect(arcA.degs.covert,tetR.response.degs)
#evalute overlap significance with hypergeometric test 
overlap.pvalue<-phyper(length(overlapping.DEGs)-1,length(arcA.degs.covert),nrow(arcA.ko.response.covert)-length(arcA.degs.covert),length(intersect(tetR.response.degs,rownames(arcA.ko.response.covert))),lower.tail = F)
print(paste("Overlap p-value=",overlap.pvalue,sep=""))
```
3.1 Fig 3A (initial analysis of barcode sequencing data of genome-wide single gene KO library growth competition assays)
```{r}
#read counts per design for TetR KO library (4 biological replicates per time point)
#this is the output of the InscriptaResolver  software 
tetR.ko.library.reads<-read.csv("../Data/KO_library_competition/raw_ko_count_data/TetR/a05h/a05h_design_report.csv",header=T)
#define unique samples (i.e., combinations of strain x replicate x treatment x time point)
#the following notation was used to label the samples: strain (tetR or wt)-K(for KO){a,b,c,d}(corresponding to the biological replicate)-'plus' or 'minus' tetracycline - {1,2,3} corresponding to the growth cycle (i.e., T1, T2, T3)
#0-plusTet and 0-minusTet correspond to T0 and they are the same sample (eg, TetR-Ka-minusTet is the same as TetR-Ka-0-plusTet) 
#T0 samples were duplicated when processing the data to facilitate (treatment-specific) downstream analyses   
tetR.ko.library.samples<-unique(tetR.ko.library.reads$SampleName)
#define KO designs
tetR.ko.library.designs<-unique(tetR.ko.library.reads$DesignId)
#create read count matrix that includes a column with the name of the gene associated with each KO design
#tetR.ko.library.count.matrix<-matrix(ncol=length(tetR.ko.library.samples)+1,nrow=length(tetR.ko.library.designs),0,
#                             dimnames = list(tetR.ko.library.designs,c("gene",tetR.ko.library.samples)))
#loop to fill out the count matrix
#for(s in tetR.ko.library.samples)
#{
#  print(s)
#  for(d in tetR.ko.library.designs)
#  {
#    current.position<-which(tetR.ko.library.reads$SampleName==s & tetR.ko.library.reads$DesignId==d)
#    current.count<-tetR.ko.library.reads[current.position,"DesignCount"]
#    tetR.ko.library.count.matrix[as.character(d),s]<- current.count
#    tetR.ko.library.count.matrix[as.character(d),"gene"]<-tetR.ko.library.reads[current.position,"Design_TargetName"]
#  }
#}
#tetR.ko.library.count.matrix.with.gene.info<-tetR.ko.library.count.matrix
#load previously compiled (with code shown above) TetR KO library read count matrix 
load("../Data/KO_library_competition/processed_count_data/tetR.ko.library.count.matrix.with.gene.info.RData")
#re-organize columns (ie, sample replicates)
tetR.ko.read.counts<-tetR.ko.library.count.matrix.with.gene.info[,c(10,2,26,18,12,4,28,20,13,5,29,21,14,6,30,22,15,7,31,23,16,8,32,24,17,9,33,25)]
#change column names to a simpler notation: Cycle-(+/-)TET-replicate{a,b,c,d}
colnames(tetR.ko.read.counts)<-paste(rep(c("C0","C1-TET-","C2-TET-","C3-TET-","C1+TET-","C2+TET-","C3+TET-"),each=4),letters[1:4],sep="")
#Ci indicates cycle # i in the competition assay. This means that C0=t0, C1=t1, etc.
#remove the column with gene names to create a new matrix that only contains numeric values
tetR.ko.read.counts.numeric<-c()
for(c in 1:ncol(tetR.ko.read.counts))
{
  tetR.ko.read.counts.numeric<-cbind(tetR.ko.read.counts.numeric,as.numeric(tetR.ko.read.counts[,c]))
}
colnames(tetR.ko.read.counts.numeric)<-colnames(tetR.ko.read.counts)
rownames(tetR.ko.read.counts.numeric)<-rownames(tetR.ko.read.counts)
#repeat process for WT KO library
#read counts per design for WT KO libraries (4 biological replicates per time point)
#this is the output of the InscriptaResolver  software
wt.ko.library.reads<-read.csv("../Data/KO_library_competition/raw_ko_count_data/WT/a05h/a05h_design_report.csv",header=T)
#define unique samples (with same notation used for the TetR KO library)
wt.ko.library.samples<-unique(wt.ko.library.reads$SampleName)
#define KO designs
wt.ko.library.designs<-unique(wt.ko.library.reads$DesignId)
#create read count matrix that includes a column with the name of the gene associated with each KO design
#wt.ko.library.count.matrix<-matrix(ncol=length(wt.ko.library.samples)+1,nrow=length(wt.ko.library.designs),0,
#                           dimnames = list(as.character(wt.ko.library.designs),c("gene",wt.ko.library.samples)))
#loop to fill out the count matrix
#for(s in wt.ko.library.samples)
#{
#  print(s)
#  for(d in wt.ko.library.designs)
#  {
#  current.position<-which(wt.ko.library.reads$SampleName==s & wt.ko.library.reads$DesignId==d)
#  current.count<-wt.ko.library.reads[current.position,"DesignCount"]
#  wt.ko.library.count.matrix[as.character(d),s]<- current.count
#  wt.ko.library.count.matrix[as.character(d),"gene"]<-wt.ko.library.reads[current.position,"Design_TargetName"]
#  }
#}
#wt.ko.library.count.matrix.with.gene.info<-wt.ko.library.count.matrix
#load previously compiled WT KO library read count matrix
load("../Data/KO_library_competition/processed_count_data/wt.ko.library.count.matrix.with.gene.info.RData")
#re-organize columns (ie, sample replicates)
wt.ko.read.counts<-wt.ko.library.count.matrix.with.gene.info[,c(10,2,26,18,12,4,28,20,13,5,29,21,14,6,30,22,15,7,31,23,16,8,32,24,17,9,33,25)]
#change column names using a new simpler notation: Cycle-(+/-)TET-replicate{a,b,c,d}
colnames(wt.ko.read.counts)<-colnames(tetR.ko.read.counts)
#remove the gene column to create a new matrix that only contains numeric values
wt.ko.read.counts.numeric<-c()
for(c in 1:ncol(wt.ko.read.counts))
{
  wt.ko.read.counts.numeric<-cbind(wt.ko.read.counts.numeric,as.numeric(wt.ko.read.counts[,c]))
}
colnames(wt.ko.read.counts.numeric)<-colnames(wt.ko.read.counts)
rownames(wt.ko.read.counts.numeric)<-rownames(wt.ko.read.counts)
#define depleted (ie, undetected) KO designs at each time point
#a KO design was considered not detected at a time point of interest when all four biological replicates had less than 10 reads for the corresponding KO design 
#initialize list to store KO designs labeled as depleted in TetR library
tetR.depleted.designs.time.series<-NULL
tetR.depleted.designs.time.series[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected KO designs per sample/time point
tetR.non.depleted.designs.vector<-c()
#a loop to define depleted KO designs at each point
for(tetR.sample in 1:7)
{
  depleted.ko.designs.current.sample<-c()
  #the designs '500955653' and '500955654' are excluded from this analysis because they were used as internal controls and they are not associated with any gene
  for(ko.design in setdiff(tetR.ko.library.designs,c("500955653","500955654")))
  {
    if(max(tetR.ko.read.counts.numeric[as.character(ko.design),((4*tetR.sample)-3):(4*tetR.sample)])<10)
  {
    depleted.ko.designs.current.sample<-c(depleted.ko.designs.current.sample,ko.design)
  }
  }
  tetR.depleted.designs.time.series[[tetR.sample]]<-depleted.ko.designs.current.sample
  tetR.non.depleted.designs.vector<-c(tetR.non.depleted.designs.vector,8271-length(depleted.ko.designs.current.sample))
}
names(tetR.non.depleted.designs.vector)<-names(tetR.depleted.designs.time.series)
#same analysis for WT
#initialize list to store KO designs labeled as depleted in WT library
wt.depleted.designs.time.series<-NULL
wt.depleted.designs.time.series[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected KO designs per sample/time point
wt.non.depleted.designs.vector<-c()
for(wt.sample in 1:7)
{
  depleted.designs.current.sample<-c()
  for(ko.design in setdiff(wt.ko.library.designs,c("500955653","500955654")))
  {
    if(max(wt.ko.read.counts.numeric[as.character(ko.design),((4*wt.sample)-3):(4*wt.sample)])<10)
  {
    depleted.designs.current.sample<-c(depleted.designs.current.sample,ko.design)
  }
  }
  wt.depleted.designs.time.series[[wt.sample]]<-depleted.designs.current.sample
  wt.non.depleted.designs.vector<-c(wt.non.depleted.designs.vector,8271-length(depleted.designs.current.sample))
}
names(wt.non.depleted.designs.vector)<-names(wt.depleted.designs.time.series)
#create left panel of Fig. 3A
par(mfrow=c(1,2))
#plot number of detected KO designs for TetR(-TET)
plot(x=0:3,y=tetR.non.depleted.designs.vector[1:4],type="o",xlab="Cycle",
     ylab="# mutant designs",col="red",ylim=c(5000,8050),lty=2,pch=20,cex=1.5,cex.lab=1.5, cex.axis=1.5)
#add number of detected KO designs for TetR(+TET)
points(x=0:3,y=tetR.non.depleted.designs.vector[c(1,5:7)],type="o",col="red4",pch=20)
#add number of detected KO designs for WT(-TET)
points(x=0:3,y=wt.non.depleted.designs.vector[1:4],type="o",col="blue",lty=2,pch=20)
#add number of detected KO designs for WT(+TET)
points(x=0:3,y=wt.non.depleted.designs.vector[c(1,5:7)],type="o",col="blue4",pch=20)
legend(x=0,y=6800,legend=c("TetR(+)","TetR(-)","WT(+)","WT(-)"),cex=1.1,col=c("red4","red","blue4","blue"),pch=rep(20,4),box.lwd = 0,lty=c(1,2,1,2))
#perform similar analysis to identify genes whose KOs were undetectable
#a gene was considered undetected (ie,depleted) if its two KO designs were labeled as depleted in the previous analysis
#initialize list to store name of genes labeled as depleted in TetR library
tetR.depleted.genes<-NULL
tetR.depleted.genes[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected genes per sample/time point
tetR.non.depleted.genes.count.vector<-c()
for(tetR.sample in 1:7)
{
  #KO designs labeled as depleted in current sample
  depleted.designs.current.sample<-tetR.depleted.designs.time.series[[tetR.sample]]
  #count number of depleted designs associated with each gene
  depleted.design.to.gene.map<-tetR.ko.library.count.matrix.with.gene.info[which(rownames(tetR.ko.library.count.matrix.with.gene.info)%in%depleted.designs.current.sample),"gene"]
  temporal.gene.count.table<-table(depleted.design.to.gene.map)
  #define set of depleted genes
  tetR.depleted.genes[[tetR.sample]]<-names(which(temporal.gene.count.table==2))
  tetR.non.depleted.genes.count.vector<-c(tetR.non.depleted.genes.count.vector,4153-length(tetR.depleted.genes[[tetR.sample]]))
}
#initialize list to store names of genes labeled as depleted in WT library
wt.depleted.genes<-NULL
wt.depleted.genes[c("T0","T1(-)","T2(-)","T3(-)","T1(+)","T2(+)","T3(+)")]<-list(NULL)
#define vector with total number of detected genes per sample/time point
wt.non.depleted.genes.count.vector<-c()
for(wt.sample in 1:7)
{
  #KO designs labeled as depleted in current sample
  depleted.designs.current.sample<-wt.depleted.designs.time.series[[wt.sample]]
  #count number of depleted designs associated with each gene
  depleted.design.to.gene.map<-wt.ko.library.count.matrix.with.gene.info[which(rownames(wt.ko.library.count.matrix.with.gene.info)%in%depleted.designs.current.sample),"gene"]
  temporal.gene.count.table<-table(depleted.design.to.gene.map)
  #define set of depleted genes
  wt.depleted.genes[[wt.sample]]<-names(which(temporal.gene.count.table==2))
  wt.non.depleted.genes.count.vector<-c(wt.non.depleted.genes.count.vector,4153-length(wt.depleted.genes[[wt.sample]]))
}
#create right panel of Fig. 3A
#plot number of detected genes for TetR(-TET)
plot(x=0:3,y=tetR.non.depleted.genes.count.vector[1:4],type="o",xlab="Cycle",
     ylab="# detected genes",col="red",ylim=c(3200,4120),lty=2,pch=20,cex=1.5,cex.lab=1.5, cex.axis=1.5)
#add number of detected genes for TetR(+TET)
points(x=0:3,y=tetR.non.depleted.genes.count.vector[c(1,5:7)],type="o",col="red4",pch=20)
#add number of detected genes for WT(-TET)
points(x=0:3,y=wt.non.depleted.genes.count.vector[1:4],type="o",col="blue",lty=2,pch=20)
#add number of detected genes for WT(+TET)
points(x=0:3,y=wt.non.depleted.genes.count.vector[c(1,5:7)],type="o",col="blue4",pch=20)
legend(x=0,y=3800,legend=c("TetR(+)","TetR(-)","WT(+)","WT(-)"),cex=1.1,col=c("red4","red","blue4","blue"),pch=rep(20,4),box.lwd = 0,lty=c(1,2,1,2))
#print summary table
output.summary<-cbind(tetR.non.depleted.designs.vector,tetR.non.depleted.genes.count.vector,wt.non.depleted.designs.vector,wt.non.depleted.genes.count.vector)
colnames(output.summary)<-c("TetR-detected KO designs","TetR-detected genes","WT-detected KO designs","WT-detected genes")
print(output.summary)
```
3.2 Create Fig. 3B
```{r}
#write CSV file with gene name and locus tag of all genes identified as depleted in the last time point (T3) of TetR+TET competition experiment due to tetracycline (this set does not include genes not detected at T0 or T3-TET)
#this file is the input for DAVID (https://david.ncifcrf.gov/home.jsp) functional annotation clustering analysis
#genes.depleted.in.tetR.due.to.tet<-setdiff(tetR.depleted.genes[["T3(+)"]],union(tetR.depleted.genes[["T0"]],tetR.depleted.genes[["T3(-)"]]))
#write.csv(file="../Data/KO_library_competition/processed_count_data/tetR_depleted_genes_t3_due_to_tetracycline.csv",cbind(genes.depleted.in.tetR.due.to.tet,translate.gene.name.to.locus(genes.depleted.in.tetR.due.to.tet)))
#create barplot for Fig 3B (using DAVID output)
par(mfrow=c(1,2))
plot.new()
barplot(c(116,74,28,27,24,18,16,11,11)
          ,cex.lab=1.25,cex.axis=1.25,cex.names = 1,xlab="# genes",
names=c("Nucleotide/ATP binding","Ribosome/Translation/rRNA-binding","LPS biosynthesis","Cell division","Cell wall org./cell shape","Biosynthesis of nucleotide sugars","DNA replication","Ribosome biogenesis","Ubiquinone biosynthesis"),col=rainbow(9),las=2,horiz = T,xlim=c(0,120),main="Fig. 3B")
```
3.3 Load and process ALDEx2 output
```{r}
#the script below runs ALDEx2 and generates relevant outputs (eg, T1 vs T0 with and without tetracycline in the TetR and WT libraries)
#source("aldex2_analysis.R")
#load ALDEx2 output
load("../Data/KO_library_competition/processed_count_data/ALDEx2_output.RData")
#notation of the ALDEx2 output: strain(tetR/wt).ko.iqlr(for inter-quartile log ratio).c0(this is T0).c#i(where i is 1 or 2 for T1 or T2).plus/minus(tetracycline treatment)
#function to define genes whose deletion impact fitness 
#that is genes for which all of their non-depleted designs were over- or under-represented in the ALDEx2 test
define.high.confindence.genes<-function(design.set,aldex.output.matrix,read.count.matrix)
{
  output<-c()
  #we need to define the genes associated with the differentially abundant designs (design.set defined using ALDEx2)
  input.design.to.gene.map<-read.count.matrix[which(rownames(read.count.matrix)%in%design.set),"gene"]
  genes.in.selected.designs<-unique(input.design.to.gene.map)
  #we also need to define how many designs were associated with each gene in ALDEx2 input matrix (rownames of the aldex.output.matrix)
  full.matrix.design.to.gene.map<-read.count.matrix[which(rownames(read.count.matrix)%in%rownames(aldex.output.matrix)),"gene"]
  #loop to evaluate which genes were differentially abundant 
  for(g in genes.in.selected.designs)
  {
    if(length(which(input.design.to.gene.map==g))==length(which(full.matrix.design.to.gene.map==g)))
    {
      output<-c(output,g)
    }
  }
  output
}  
#############################################################
#adjusted p-value (qvalue) and ALDEx2-estimated effect thresholds used in the analysis
qval.threshold<-0.1
effect.threshold<-2
#process ALDEx2 output for TetR+TET (T1) vs TetR (T0) (tetR.ko.iqlr.c0.c1.plus)
#identify under-represented (deleterious) KO designs
tetR.deleterious.feautures.c0.c1.plus<-rownames(tetR.ko.iqlr.c0.c1.plus)[which(tetR.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold & tetR.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold &  tetR.ko.iqlr.c0.c1.plus$effect < -1*effect.threshold)]
#define genes whose deletions were deleterious
tetR.deleterious.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.deleterious.feautures.c0.c1.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c1.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
tetR.beneficial.feautures.c0.c1.plus<-rownames(tetR.ko.iqlr.c0.c1.plus)[which(tetR.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold  & tetR.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold  & tetR.ko.iqlr.c0.c1.plus$effect > effect.threshold)]
#define genes whose deletions were beneficial
tetR.beneficial.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.beneficial.feautures.c0.c1.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c1.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#remove genes with KO designs labeled as beneficial AND deleterious
tetR.deleterious.deletions.c0.c1.plus.final<-setdiff(tetR.deleterious.deletions.c0.c1.plus.high.confidence,tetR.beneficial.deletions.c0.c1.plus.high.confidence)
tetR.beneficial.deletions.c0.c1.plus.final<-setdiff(tetR.beneficial.deletions.c0.c1.plus.high.confidence,tetR.deleterious.deletions.c0.c1.plus.high.confidence)
#############################################################
#process ALDEx2 output for TetR+TET (T2) vs TetR (T0) (tetR.ko.iqlr.c0.c2.plus)
#identify under-represented (deleterious) KO designs 
tetR.deleterious.feautures.c0.c2.plus<-rownames(tetR.ko.iqlr.c0.c2.plus)[which(tetR.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold  & tetR.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold & tetR.ko.iqlr.c0.c2.plus$effect < -1*effect.threshold)]
#identify genes whose deletions were deleterious
tetR.deleterious.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.deleterious.feautures.c0.c2.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c2.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
tetR.beneficial.feautures.c0.c2.plus<-rownames(tetR.ko.iqlr.c0.c2.plus)[which(tetR.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold & tetR.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold & tetR.ko.iqlr.c0.c2.plus$effect > effect.threshold)]
#identify genes whose deletions were beneficial
tetR.beneficial.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =tetR.beneficial.feautures.c0.c2.plus, aldex.output.matrix = tetR.ko.iqlr.c0.c2.plus,read.count.matrix = tetR.ko.library.count.matrix.with.gene.info)
#remove genes with mutant designs labeled as beneficial AND deleterious
tetR.deleterious.deletions.c0.c2.plus.final<-setdiff(tetR.deleterious.deletions.c0.c2.plus.high.confidence,tetR.beneficial.deletions.c0.c2.plus.high.confidence)
tetR.beneficial.deletions.c0.c2.plus.final<-setdiff(tetR.beneficial.deletions.c0.c2.plus.high.confidence,tetR.deleterious.deletions.c0.c2.plus.high.confidence)
#consolidate results for the TetR library
#gene deletions with opposite effects at different time points were not considered
tetR.deleterious.aldex.compilation<-setdiff(union(tetR.deleterious.deletions.c0.c1.plus.final,tetR.deleterious.deletions.c0.c2.plus.final),union(tetR.beneficial.deletions.c0.c1.plus.final,tetR.beneficial.deletions.c0.c2.plus.final))
tetR.beneficial.aldex.compilation<-setdiff(union(tetR.beneficial.deletions.c0.c1.plus.final,tetR.beneficial.deletions.c0.c2.plus.final),union(tetR.deleterious.deletions.c0.c1.plus.final,tetR.deleterious.deletions.c0.c2.plus.final))
print(paste("TetR had",length(tetR.deleterious.aldex.compilation),"genes whose deletions were deleterious",sep=" "))
print(paste("TetR had",length(tetR.beneficial.aldex.compilation),"genes whose deletions were beneficial",sep=" "))
#zero KO designs were identified as differentially abundant at T1(-TET) or T2(-TET) vs T0
#############################################################
#process ALDEx2 output for WT+TET (T1) vs WT (T0) (wt.ko.iqlr.c0.c1.plus)
#identify under-represented (deleterious) KO designs
wt.deleterious.feautures.c0.c1.plus<-rownames(wt.ko.iqlr.c0.c1.plus)[which(wt.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$effect < -1*effect.threshold)]
#identify genes whose deletions were deleterious
wt.deleterious.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =wt.deleterious.feautures.c0.c1.plus, aldex.output.matrix = wt.ko.iqlr.c0.c1.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
wt.beneficial.feautures.c0.c1.plus<-rownames(wt.ko.iqlr.c0.c1.plus)[which(wt.ko.iqlr.c0.c1.plus$we.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$wi.eBH < qval.threshold & wt.ko.iqlr.c0.c1.plus$effect > effect.threshold)]
#identify genes whose deletions were beneficial
wt.beneficial.deletions.c0.c1.plus.high.confidence<-define.high.confindence.genes(design.set =wt.beneficial.feautures.c0.c1.plus, aldex.output.matrix = wt.ko.iqlr.c0.c1.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#remove genes with mutant designs identified as both beneficial AND deleterious
wt.deleterious.deletions.c0.c1.plus.final<-setdiff(wt.deleterious.deletions.c0.c1.plus.high.confidence,wt.beneficial.deletions.c0.c1.plus.high.confidence)
wt.beneficial.deletions.c0.c1.plus.final<-setdiff(wt.beneficial.deletions.c0.c1.plus.high.confidence,wt.deleterious.deletions.c0.c1.plus.high.confidence)
#############################################################
#process ALDEx2 output for WT+TET (T2) vs WT (T0) (wt.ko.iqlr.c0.c2.plus)
#identify under-represented (deleterious) KO designs
wt.deleterious.feautures.c0.c2.plus<-rownames(wt.ko.iqlr.c0.c2.plus)[which(wt.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$effect < -1*effect.threshold )]
#identify genes whose deletions were deleterious
wt.deleterious.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =wt.deleterious.feautures.c0.c2.plus, aldex.output.matrix = wt.ko.iqlr.c0.c2.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#identify over-represented (beneficial) KO designs
wt.beneficial.feautures.c0.c2.plus<-rownames(wt.ko.iqlr.c0.c2.plus)[which(wt.ko.iqlr.c0.c2.plus$we.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$wi.eBH < qval.threshold  & wt.ko.iqlr.c0.c2.plus$effect > effect.threshold)]
#identify genes whose deletions were beneficial
wt.beneficial.deletions.c0.c2.plus.high.confidence<-define.high.confindence.genes(design.set =wt.beneficial.feautures.c0.c2.plus, aldex.output.matrix = wt.ko.iqlr.c0.c2.plus,read.count.matrix = wt.ko.library.count.matrix.with.gene.info)
#remove genes with mutant designs identified as both beneficial AND deleterious
wt.deleterious.deletions.c0.c2.plus.final<-setdiff(wt.deleterious.deletions.c0.c2.plus.high.confidence,wt.beneficial.deletions.c0.c2.plus.high.confidence)
wt.beneficial.deletions.c0.c2.plus.final<-setdiff(wt.beneficial.deletions.c0.c2.plus.high.confidence,wt.deleterious.deletions.c0.c2.plus.high.confidence)
#consolidate results for the WT library
wt.deleterious.aldex.compilation<-union(wt.deleterious.deletions.c0.c1.plus.final,wt.deleterious.deletions.c0.c2.plus.final)
wt.beneficial.aldex.compilation<-union(wt.beneficial.deletions.c0.c1.plus.final,wt.beneficial.deletions.c0.c2.plus.final)
print(paste("WT had",length(wt.deleterious.aldex.compilation),"genes whose deletions were deleterious",sep=" "))
print(paste("WT had",length(wt.beneficial.aldex.compilation),"genes whose deletions were beneficial",sep=" "))
```
3.4 Fig. 3C
```{r}
#plot profiles of changes in abundance (with respect to T0) for selected genes (one KO design per gene)
selected.genes<-c("atpA","frdC","acrZ","mdtA","uvrA","arcA","phoP","rpoS","cytR")
#corresponding KO designs (in the same order of the selected genes)
selected.designs<-c("6417560","6418918","6423156","6417461","6422556","6405971","6421304","6414543","6417292")
par(mfrow=c(3,3))
#loop to generate each subpanel
for(i in 1:length(selected.genes))
  {
    current.design<-selected.designs[i]
    current.gene<-selected.genes[i]
    #profile of abundance difference of current gene in  antibiotic-free TetR library
    tetR.untreated.vector<-c(0,tetR.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.minus[current.design,"diff.btw"]) 
    #profile of abundance difference of current gene in  tetracycline-treated TetR library
    tetR.treated.vector<-c(0,tetR.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
    #profile of abundance difference of current gene in  antibiotic-free WT library
    wt.untreated.vector<-c(0,wt.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.minus[current.design,"diff.btw"])
    #profile of abundance difference of current gene in  tetracycline-treated WT library
    wt.treated.vector<-c(0,wt.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
    #create plot for current gene
    plot(y=tetR.untreated.vector,x=0:2,col="red",lty=2,ylim=range(c(tetR.untreated.vector,tetR.treated.vector,wt.untreated.vector,wt.treated.vector),na.rm = T),main=paste(current.gene,current.design,sep="-"),type="o",ylab=expression(paste(Delta,"abundance (vs t0)")),xlab="Cycle",cex=1.1,cex.lab=1.1, cex.axis=1.1)
    points(y=tetR.treated.vector,x=0:2,col="red4",type="o")
    points(y=wt.treated.vector,x=0:2,col="blue4",type="o")
    points(y=wt.untreated.vector,x=0:2,col="blue",type="o",lty=2)
  }
```
3.5 Fig. S4
```{r}
#create Fig. S4A
#compare genes whose deletion affect fitness on TetR and/or WT backgrounds
#comparison of genes associated with deleterious KO deletions
conserved.deleterious.genes<-intersect(wt.deleterious.aldex.compilation,tetR.deleterious.aldex.compilation)
tetR.unique.deleterious.genes<-setdiff(tetR.deleterious.aldex.compilation,wt.deleterious.aldex.compilation)
wt.unique.deleterious.genes<-setdiff(wt.deleterious.aldex.compilation,tetR.deleterious.aldex.compilation)
#comparison of genes associated with beneficial KO deletions
conserved.beneficial.genes<-intersect(wt.beneficial.aldex.compilation,tetR.beneficial.aldex.compilation)
tetR.unique.beneficial.genes<-setdiff(tetR.beneficial.aldex.compilation,wt.beneficial.aldex.compilation)
wt.unique.beneficial.genes<-setdiff(wt.beneficial.aldex.compilation,tetR.beneficial.aldex.compilation)
#compile name of genes that impact fitness in the WT and/or TetR strains during tetracycline treatment
differentially.abundant.gene.compilation<-unique(c(conserved.beneficial.genes,wt.unique.beneficial.genes,tetR.unique.beneficial.genes,conserved.deleterious.genes,wt.unique.deleterious.genes,tetR.unique.deleterious.genes))
#create matrix for heatmap
input.matrix.for.heatmap<-matrix(nrow=length(differentially.abundant.gene.compilation),ncol=2,dimnames = list(differentially.abundant.gene.compilation,c("WT","TetR")),0)
#fill out input matrix
#the notation is: +1 = beneficial deletion, -1 = deleterious deletion, 0 = neutral
for(gene in differentially.abundant.gene.compilation)
{
  #check if deletion of current gene is beneficial in the WT background
  if(gene %in% wt.beneficial.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"WT"]<-1
  }
  #check if deletion of current gene is deleterious in the WT background
  if(gene %in% wt.deleterious.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"WT"]<- -1
  }
  #check if deletion of current gene is beneficial in the TetR background
  if(gene %in% tetR.beneficial.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"TetR"]<-1
  }
  #check if deletion of current gene is deleterious in the WT background
  if(gene %in% tetR.deleterious.aldex.compilation)
  {
    input.matrix.for.heatmap[gene,"TetR"]<- -1
  }
}
pheatmap(input.matrix.for.heatmap,scale="none",color = colorRampPalette(rev(brewer.pal(3, "PiYG")) )(3)[3:1],cluster_rows=F,cluster_cols = F,fontsize = 5,angle_col = 90,legend=T, main="Fig. S4A",breaks =c(-1,-0.0001,0.0001,1),legend_breaks = c(-1,-0.0001,0.0001,1),legend_labels = c("-1","0","0","+1"),show_rownames=FALSE)
#create Fig. S4B 
#write CSV files with TetR-specific deleterious and beneficial gene deletions
#write.csv(file="../Data/KO_library_competition/processed_count_data/tetR_specific_beneficial_genes_082422.csv",cbind(translate.gene.name.to.locus(tetR.unique.beneficial.genes),tetR.unique.beneficial.genes))
#write.csv(file="../Data/KO_library_competition/processed_count_data/tetR_specific_deleterious_genes_082422.csv",cbind(translate.gene.name.to.locus(tetR.unique.deleterious.genes),tetR.unique.deleterious.genes))
#based on output from DAVID functional enrichment analysis
#create barplot 
par(mfrow=c(1,2))
plot.new()
barplot(c(40,29,13,6,9),cex.lab=1.1,cex.axis=1.1,cex.names = 1.1,xlab="Number of genes",
names=c("Fe/other ion transport","Pilus-cell adhesion","TCA","Enterobactin biosynthesis","Ubiquinone/other terpenoid-quinone biosynthesis"),col=c("#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854"),las=2,horiz = T,xlim=c(0,42),main="Fig. S4B")
#create Fig. S4C
#heatmap for TFs whose deletion affect fitness of TetR and/or WT
#define names of E. coli TFs 
ecoli.tfs<-convert.locus.to.gene.name(tf.names)
#manually update TF names to include gene name synonym used by InscriptaResolver
ecoli.tfs[which(ecoli.tfs=="rpiR")]<-"alsR"
ecoli.tfs[which(ecoli.tfs=="ycgE")]<-"bluR"
ecoli.tfs[which(ecoli.tfs=="ycfQ")]<-"comR"
ecoli.tfs[which(ecoli.tfs=="fruR")]<-"cra"
ecoli.tfs[which(ecoli.tfs=="yfhA")]<-"glrR"
ecoli.tfs[which(ecoli.tfs=="yjiE")]<-"hypT"
ecoli.tfs[which(ecoli.tfs=="matA")]<-"ecpR"
ecoli.tfs[which(ecoli.tfs=="chpR")]<-"mazE"
ecoli.tfs[which(ecoli.tfs=="dgsA")]<-"mlc"
ecoli.tfs[which(ecoli.tfs=="ygiT")]<-"mqsA"
ecoli.tfs[which(ecoli.tfs=="yfeT")]<-"murR"
ecoli.tfs[which(ecoli.tfs=="ycjZ")]<-"pgrR"
ecoli.tfs[which(ecoli.tfs=="ybjK")]<-"rcdA"
ecoli.tfs[which(ecoli.tfs=="ydcN")]<-"sutR"
ecoli.tfs[which(ecoli.tfs=="yehT")]<-"btsR"
ecoli.tfs[which(ecoli.tfs=="yqjI")]<-"nfeR"
#identify TFs whose deletion affect fitness of TetR and/or WT
differentially.abundant.tfs<-intersect(ecoli.tfs,differentially.abundant.gene.compilation)
#create matrix with changes in abundance (delta) with respect to T0 for selected TFs during tetracycline treatment
tf.deletion.delta.abundance.matrix<-c()
#vector to record order of TFs (important for next step)
order.of.tfs.in.matrix<-c()
#identify KO designs associated with differentially abundant TFs
tf.ko.designs<-rownames(tetR.ko.library.count.matrix.with.gene.info)[which(tetR.ko.library.count.matrix.with.gene.info[,"gene"]%in% differentially.abundant.tfs)]
#fill out tf.deletion.delta.abundance.matrix
for(d in tf.ko.designs)
{
    current.design.delta.abundance.profile<-cbind(wt.ko.iqlr.c0.c1.plus[d,"diff.btw"],wt.ko.iqlr.c0.c2.plus[d,"diff.btw"],tetR.ko.iqlr.c0.c1.plus[d,"diff.btw"],tetR.ko.iqlr.c0.c2.plus[d,"diff.btw"])
    tf.deletion.delta.abundance.matrix<-rbind(tf.deletion.delta.abundance.matrix,
                                              current.design.delta.abundance.profile)
    order.of.tfs.in.matrix<-c(order.of.tfs.in.matrix,tetR.ko.library.count.matrix.with.gene.info[d,"gene"])
}
#rename matrix using TF names
rownames(tf.deletion.delta.abundance.matrix)<-order.of.tfs.in.matrix
#re-order matrix rows by TF names
tf.deletion.delta.abundance.matrix<-tf.deletion.delta.abundance.matrix[order(rownames(tf.deletion.delta.abundance.matrix)),]
#compute mean difference in abundance for TFs with two KO designs
average.tf.deletion.delta.abundance.matrix<-c()
for(tf in unique(rownames(tf.deletion.delta.abundance.matrix)))
{
  current.tf.position<-which(rownames(tf.deletion.delta.abundance.matrix)==tf)
  if(length(current.tf.position)==2)
  {
    average.tf.deletion.delta.abundance.matrix<-rbind(average.tf.deletion.delta.abundance.matrix,colMeans(tf.deletion.delta.abundance.matrix[current.tf.position,],na.rm = T))
  }
  else
  {
    average.tf.deletion.delta.abundance.matrix<-rbind(average.tf.deletion.delta.abundance.matrix,tf.deletion.delta.abundance.matrix[current.tf.position,])
  }
}
rownames(average.tf.deletion.delta.abundance.matrix)<-unique(rownames(tf.deletion.delta.abundance.matrix))
#replace NAs (that occur for those time points in which a gene KOs were not detected) with a "20" value
average.tf.deletion.delta.abundance.matrix[which(is.na(average.tf.deletion.delta.abundance.matrix))]<-20
#add column names
colnames(average.tf.deletion.delta.abundance.matrix)<-c("WT.t1","WT.t2","TetR.t1","TetR.t2")
#define heatmap breaks
heatmap.breaks<-c(-10,-4,-2,-1,-0.5,0,0.5,1,2,4,10,20)
#generate heatmap
pheatmap(average.tf.deletion.delta.abundance.matrix,scale="none",color =c(colorRampPalette(rev(brewer.pal(10, "PiYG")) )(10)[10:1],"black"),
         cluster_rows=F,cluster_cols = F,fontsize = 5,angle_col = 90,
legend=T,breaks =heatmap.breaks,legend_breaks = heatmap.breaks,main="Fig. S4C")
```
3.6 Fig. S5
```{r}
#plot profiles of changes in abundance (with respect to T0) for selected genes (one KO design per gene)
selected.genes<-c("menB","frdB","hybB","mdh")
#corresponding KO designs (in the same order of the selected genes)
selected.designs<-c("6419317","6416702","6418292","6419091")
#loop to generate each subpanel
par(mfrow=c(2,2))
for(i in 1:length(selected.genes))
{
  current.gene<-selected.genes[i]
  current.design<-selected.designs[i]
  #profile of abundance difference of current gene in  antibiotic-free TetR library
  tetR.untreated.vector<-c(0,tetR.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.minus[current.design,"diff.btw"])
  #profile of abundance difference of current gene in  tetracycline-treated TetR library
  tetR.tetracycline.treated.vector<-c(0,tetR.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],tetR.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
  #profile of abundance difference of current gene in  antibiotic-free WT library
  wt.untreated.vector<-c(0,wt.ko.iqlr.c0.c1.minus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.minus[current.design,"diff.btw"])
  #profile of abundance difference of current gene in  tetracycline-treated WT library
  wt.tetracycline.treated.vector<-c(0,wt.ko.iqlr.c0.c1.plus[current.design,"diff.btw"],wt.ko.iqlr.c0.c2.plus[current.design,"diff.btw"])
  #create plot for current gene
  plot(y=tetR.untreated.vector,x=0:2,col="red",lty=2,ylim=range(c(tetR.untreated.vector,tetR.tetracycline.treated.vector,wt.untreated.vector,wt.tetracycline.treated.vector),na.rm = T),main=paste(current.gene,current.design,sep="-"),type="o",ylab=expression(paste(Delta,"abundance (vs t0)")),xlab="Cycle",cex=1.1,cex.lab=1.1, cex.axis=1.1)
  points(y=tetR.tetracycline.treated.vector,x=0:2,col="red4",type="o")
  points(y=wt.untreated.vector,x=0:2,col="blue",type="o",lty=2)
  points(y=wt.tetracycline.treated.vector,x=0:2,col="blue4",type="o")
}
```
3.7 Evaluate overlap between TetR dropouts at T3 and ALDEx2 results
```{r}
#identify genes labeled as undetectable at the end of the experiment with tetracycline
#exclude genes that were not detected at the beginning of the experiment (T0) and the end of the experiment with no tetracycline (T3-TET)
tetR.depleted.genes.due.to.tetracycline<-setdiff(tetR.depleted.genes[["T3(+)"]],union(tetR.depleted.genes[["T0"]],tetR.depleted.genes[["T3(-)"]]))
#overlap with TetR-specific genes with deleterious deletions
overlapping.genes<-intersect(tetR.depleted.genes.due.to.tetracycline,tetR.unique.deleterious.genes)
print(paste("There are",length(overlapping.genes),"genes in common",sep=" "))
#perform hypergeometric test
#define gene universe. First, we need to identify which genes were included in the ALDEx2 comparisons for T1(+TET) vs T0 and T2(+TET) vs T0
aldex2.designs.included.in.comparisons<-union(rownames(tetR.ko.iqlr.c0.c1.plus),rownames(tetR.ko.iqlr.c0.c2.plus))
#remove two control KO designs not associated with any gene
aldex2.designs.included.in.comparisons<-setdiff(aldex2.designs.included.in.comparisons,c("500955653","500955654"))
#define the genes associated with the KO designs
gene.universe.for.test<-unique(tetR.ko.library.count.matrix.with.gene.info[aldex2.designs.included.in.comparisons,"gene"])
#run hypergeometric test
q<-length(overlapping.genes)
m<-length(intersect(gene.universe.for.test,tetR.depleted.genes.due.to.tetracycline))
n<-length(gene.universe.for.test)-m
k<-length(intersect(tetR.unique.deleterious.genes,gene.universe.for.test))
pvalue.overlap<-phyper(q-1,m,n,k,lower.tail = F)
print(paste("Overlap P-value:",round(pvalue.overlap,digits=3),sep=" "))
```
3.8 Create individual tables for Data Set S2 (with information about undetectable genes)
```{r}
#alternative function to convert gene names into corresponding loci
#this function uses E. coli genome annotation to extract locus tag
convert.gene.name.to.locus.tag<-function(geneNames)
{
  output<-c()
  #read E. coli genome information
  ecoli.genome<-read.csv("../Data/Miscellaneous_files/ecoli_gene_ids.csv",header=T)
  #define locus tag for each gene of the input set
  for(g in geneNames)
  {
    #first, evaluate if the current name is the standard gene symbol
    current.gene.position<-which(ecoli.genome$Symbol == g)
    #second, evaluate if the current name is a synonym 
    current.gene.alternative.position<-grep(g,ecoli.genome$Aliases)
    #for the fist case, extract locus id 
    if(length(current.gene.position)==1)
    {
      output<-c(output,strsplit(ecoli.genome$Aliases[current.gene.position],split = ",")[[1]][1])
    }
    #in the second scenario
    if(length(current.gene.position)==0) 
    {
      #extract the locus id
      if(length(current.gene.alternative.position)==1)
      {
        output<-c(output,strsplit(ecoli.genome$Aliases[current.gene.alternative.position],split = ",")[[1]][1])
      }
    #if the current name was not found or more than one potential loci were identified, keep the original gene name   
    if(length(current.gene.alternative.position)!=1)
    {
      output<-c(output,g)
    }
    }
  }
  output
}
#write CSV files with genes depleted along the TetR experiment
for(i in 1:7)
{
  #create table with name of depleted genes and the corresponding loci
  temporal.table<-cbind(tetR.depleted.genes[[i]],convert.gene.name.to.locus.tag(tetR.depleted.genes[[i]]))
  colnames(temporal.table)<-c("Gene Name","Locus")
  write.csv(file=paste("../../Supplement/Dataset S2/TetR_",names(tetR.depleted.genes)[i],"_depleted_genes.csv",sep=""),temporal.table,quote = F,row.names = F)
}
#write CSV file with information about overlap between time points
tetR.depleted.genes.comparison.table<-matrix(nrow=7,ncol=7,dimnames=list(names(tetR.depleted.genes),names(tetR.depleted.genes)),0)
for(r in 1:7)
{
  for(c in 1:7)
  {
    tetR.depleted.genes.comparison.table[r,c]<-length(intersect(tetR.depleted.genes[[r]],tetR.depleted.genes[[c]]))
  }
}
 write.csv(file="../../Supplement/Dataset S2/TetR_gene_depletion_comparison.csv",tetR.depleted.genes.comparison.table)
#write CSV files with genes depleted in the WT experiments
for(i in 1:7)
{
  temporal.table<-cbind(wt.depleted.genes[[i]],convert.gene.name.to.locus.tag(wt.depleted.genes[[i]]))
  colnames(temporal.table)<-c("Gene Name","Locus")
  write.csv(file=paste("../../Supplement/Dataset S2/WT_",names(wt.depleted.genes)[i],"_depleted_genes.csv",sep=""),temporal.table,quote = F,row.names = F)
}
#write CSV file with information about overlap between time points
wt.depleted.genes.comparison.table<-matrix(nrow=7,ncol=7,dimnames=list(names(wt.depleted.genes),names(wt.depleted.genes)),0)
for(r in 1:7)
{
  for(c in 1:7)
  {
    wt.depleted.genes.comparison.table[r,c]<-length(intersect(wt.depleted.genes[[r]],wt.depleted.genes[[c]]))
  }
}
 write.csv(file="../../Supplement/Dataset S2/WT_gene_depletion_comparison.csv",wt.depleted.genes.comparison.table)
```
3.9 Create single files for Data Set S1
```{r}
#write CSV files with raw read counts for KO designs in WT and TetR KO libraries
temporal.table<-cbind(rownames(tetR.ko.read.counts.numeric),tetR.ko.library.count.matrix.with.gene.info[rownames(tetR.ko.read.counts.numeric),"gene"],tetR.ko.read.counts.numeric)
colnames(temporal.table)[1:6]<-c("Design ID","Gene","C0-a","C0-b","C0-c","C0-d")
write.csv(file="../../Supplement/Dataset S1/TetR_raw_read_counts.csv",temporal.table,quote = F,row.names = F)
#repeat for WT
temporal.table<-cbind(rownames(wt.ko.read.counts.numeric),wt.ko.library.count.matrix.with.gene.info[rownames(wt.ko.read.counts.numeric),"gene"],wt.ko.read.counts.numeric)
colnames(temporal.table)[1:6]<-c("Design ID","Gene","C0-a","C0-b","C0-c","C0-d")
write.csv(file="../../Supplement/Dataset S1/WT_raw_read_counts.csv",temporal.table,quote = F,row.names = F)
#save CSV files with ALDEx2 output
#tetR KO library
write.csv(file="../../Supplement/Dataset S1/TetR_t1+tet_vs_t0.csv",tetR.ko.iqlr.c0.c1.plus)
write.csv(file="../../Supplement/Dataset S1/TetR_t1-tet_vs_t0.csv",tetR.ko.iqlr.c0.c1.minus)
write.csv(file="../../Supplement/Dataset S1/TetR_t2+tet_vs_t0.csv",tetR.ko.iqlr.c0.c2.plus)
write.csv(file="../../Supplement/Dataset S1/TetR_t2-tet_vs_t0.csv",tetR.ko.iqlr.c0.c2.minus)
#wt KO library
write.csv(file="../../Supplement/Dataset S1/WT_t1+tet_vs_t0.csv",wt.ko.iqlr.c0.c1.plus)
write.csv(file="../../Supplement/Dataset S1/WT_t1-tet_vs_t0.csv",wt.ko.iqlr.c0.c1.minus)
write.csv(file="../../Supplement/Dataset S1/WT_t2+tet_vs_t0.csv",wt.ko.iqlr.c0.c2.plus)
write.csv(file="../../Supplement/Dataset S1/WT_t2-tet_vs_t0.csv",wt.ko.iqlr.c0.c2.minus)
#save CSV files with information of all genes affecting fitness in the TetR KO library
#this leverages previously constructed matrix that compiled effect of gene deletions
tetR.gene.KO.fitness.effect<-input.matrix.for.heatmap[which(input.matrix.for.heatmap[,"TetR"]!=0),"TetR"]
tetR.genes.impacting.fitness<-names(tetR.gene.KO.fitness.effect)
tetR.deletion.effect<-c()
tetR.specificity<-c()
for(r in 1:length(tetR.gene.KO.fitness.effect))
{
  if(tetR.gene.KO.fitness.effect[r]==1)
  {
  tetR.deletion.effect<-c(tetR.deletion.effect,"Positive")
  }
  if(tetR.gene.KO.fitness.effect[r]==-1)
  {
  tetR.deletion.effect<-c(tetR.deletion.effect,"Negative")
  }
  if(input.matrix.for.heatmap[tetR.genes.impacting.fitness[r],"WT"]==0)
  {
  tetR.specificity<-c(tetR.specificity,"Yes")
  }
  if(input.matrix.for.heatmap[tetR.genes.impacting.fitness[r],"WT"]!=0)
  {
  tetR.specificity<-c(tetR.specificity,"No")
  }
}
temporal.table<-cbind(tetR.genes.impacting.fitness,tetR.deletion.effect,tetR.specificity)
colnames(temporal.table)<-c("Gene","Deletion effect","TetR-specific?")
temporal.table<-temporal.table[order(temporal.table[,"Gene"]),]
write.csv(file="../../Supplement/Dataset S1/TetR_genes_impacting_fitness.csv",temporal.table,quote = F,row.names = F)
#create similar table for WT KO library
wt.gene.KO.fitness.effect<-input.matrix.for.heatmap[which(input.matrix.for.heatmap[,"WT"]!=0),"WT"]
wt.genes.impacting.fitness<-names(wt.gene.KO.fitness.effect)
wt.deletion.effect<-c()
wt.specificity<-c()
for(r in 1:length(wt.gene.KO.fitness.effect))
{
  if(wt.gene.KO.fitness.effect[r]==1)
  {
  wt.deletion.effect<-c(wt.deletion.effect,"Positive")
  }
  if(wt.gene.KO.fitness.effect[r]==-1)
  {
  wt.deletion.effect<-c(wt.deletion.effect,"Negative")
  }
  if(input.matrix.for.heatmap[wt.genes.impacting.fitness[r],"TetR"]==0)
  {
  wt.specificity<-c(wt.specificity,"Yes")
  }
  if(input.matrix.for.heatmap[wt.genes.impacting.fitness[r],"TetR"]!=0)
  {
  wt.specificity<-c(wt.specificity,"No")
  }
}
temporal.table<-cbind(wt.genes.impacting.fitness,wt.deletion.effect,wt.specificity)
colnames(temporal.table)<-c("Gene","Deletion effect","WT-specific?")
temporal.table<-temporal.table[order(temporal.table[,"Gene"]),]
write.csv(file="../../Supplement/Dataset S1/WT_genes_impacting_fitness.csv",temporal.table,quote = F,row.names = F)
```
4.1 Fig. 4A
```{r}
#function to change format of time data so it can be read by Growthcurver
change.time.format<-function(time.vector)
{
  output<-c()
  for(x in time.vector)
  {
    #read time in the HH:MM:SS format and convert it to decimal format
    current.time.point<-strsplit(x,split = ":")[[1]]
    output<-c(output, as.numeric(current.time.point[1]) + ((as.numeric(current.time.point[3]) + (60*as.numeric(current.time.point[2])))/3600))
  }
  output<-round(output,digits = 3)
  output
}
#function to estimate the average and standard deviation (sd) of each strain (taking into account all of its replicates) at each time point
#the input data frame should have three columns: "Time","Strain" and "OD". The latter is the OD readings collected in the Bioscreen experiment
compute.mean.and.sd<-function(growth.df)
{
  output<-c()
  measured.time.points<-unique(growth.df$Time)
  #for each time point
  for(t in measured.time.points)
  {
    #for each strain
    for(s in unique(growth.df$Strain))
    {
     #compute mean OD reading value    
     average.current.strain.and.time.point<-mean(growth.df$OD[which(growth.df$Strain==s & growth.df$Time==t)])
     #compute corresponding SD
     sd.current.strain.and.time.point<-sd(growth.df$OD[which(growth.df$Strain==s & growth.df$Time==t)])
     output<-rbind(output,cbind(t,s,average.current.strain.and.time.point,sd.current.strain.and.time.point))
    }
  }
  #add column names
  colnames(output)<-c("Time","Strain","OD","sd")
  #make sure data frame format is good for downstream analyses
  output<-as.data.frame(output)
  output$Time<-as.numeric(as.vector(output$Time))
  output$OD<-as.numeric(as.vector(output$OD))
  output$sd<-as.numeric(as.vector(output$sd))
  output
}
#read OD readings from Bioscreen experiment to generate corresponding growth curves
#there were five strains in the experiment
#for each strain/tetracycline concentration, there are three biological replicates (each one with two replicates)
#bioscreen experiment was performed for a total of 48h
growth.data<-read.csv("../Data/Fitness_BioscreenC/20220504_formatted_data.csv",row.names=1)
#change time format
time.data<-change.time.format(rownames(growth.data))
#update row names with time in new format
rownames(growth.data)<-round(time.data,digits = 2)
#normalize data by subtracting minimum OD reading per well
growth.data.normalized<-c()
for(i in 1:ncol(growth.data))
{
  minimum.reading.current.well<-min(growth.data[,i])
  growth.data.normalized<-cbind(growth.data.normalized,
                                growth.data[,i]-minimum.reading.current.well)
}
#add column names
colnames(growth.data.normalized)<-colnames(growth.data)
#add row names
rownames(growth.data.normalized)<-rownames(growth.data)
#these are the tetracycline concentrations included in the experiment
tetracycline.concentrations<-seq(0,24,by=4) # ug/ml
#labels for the five strains in the dataset
strains<-c("WT","WTarcA","TetR","TetRarcA","pRB3arcA")
#color to be used for each strain
strain.colors<-c(WT="#66C2A5",WTarcA="#FC8D62",TetR="#8DA0CB",TetRarcA="#E78AC3",pRB3arcA="#A6D854")
#the OD data has been formatted such as the name of each well contains the information about the strain and the tetracycline concentration
#for example: TetR_20 means that the well was inoculated with the TetR strain and 20 ug/ml of tetracycline
#create Fig. 3A left panel (ie, antibiotic-free)
#create data frame
antibiotic.free.cultures.df<-c()
#loop to compile OD data for all five strains
for(s in strains)
{
  current.strain.concentration<-paste(s,0,sep="_")
  #identify the position of all replicates for the current strain/concentration combination
  replicates.positions<-grep(current.strain.concentration,colnames(growth.data.normalized))
  #add OD readings (first 24h) of the relevant replicates to the data frame
  for(r in replicates.positions)
  {
    antibiotic.free.cultures.df<-rbind(antibiotic.free.cultures.df,cbind(rownames(growth.data.normalized)[1:50],growth.data.normalized[1:50,r],rep(s,50)))
  }
}
#add column names
colnames(antibiotic.free.cultures.df)<-c("Time","OD","Strain")
#make sure data frame format is correct
antibiotic.free.cultures.df<-as.data.frame(antibiotic.free.cultures.df)
antibiotic.free.cultures.df$Strain <- as.factor(antibiotic.free.cultures.df$Strain)
antibiotic.free.cultures.df$Time<- as.numeric(as.vector(antibiotic.free.cultures.df$Time))
antibiotic.free.cultures.df$OD<- as.numeric(as.vector(antibiotic.free.cultures.df$OD))
#compute mean and sd per strain/concentration/time point combination
antibiotic.free.cultures.df.v2 <- compute.mean.and.sd(antibiotic.free.cultures.df)
#create ggplot object to generate final figure
antibiotic.free.subpanel<- ggplot(antibiotic.free.cultures.df.v2, aes(x=Time,y=OD, group=Strain,colour=Strain)) + 
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=OD-sd, ymax=OD+sd), width=0.75,alpha=0.75)+
  theme_classic()
#create Fig. 4A right panel
#treatment with 20 ug/ml of tetracycline
tet.concentration<- 20
#create data frame to store OD readings 
#only TetR, TetR DarcA and TetR DarcA + pRB3-arcA (episomal complemented strain) were treated with tetracycline
tetracycline.treated.cultures.df<-c()
  for(s in strains[3:5])
  {
    current.strain.concentration<-paste(s,tet.concentration,sep="_")
    #identify the position of all replicates for the current strain/concentration combination
    replicates.positions<-grep(current.strain.concentration,colnames(growth.data))
    #add OD readings (full 48h) for the relevant replicates to the data frame
    for(r in replicates.positions)
    {
      tetracycline.treated.cultures.df<-rbind(tetracycline.treated.cultures.df,cbind(rownames(growth.data.normalized),growth.data.normalized[,r],rep(s,nrow(growth.data.normalized))))
    }
  }
  #add column names  
  colnames(tetracycline.treated.cultures.df)<-c("Time","OD","Strain")
  #make sure data frame format is correct
  tetracycline.treated.cultures.df<-as.data.frame(tetracycline.treated.cultures.df)
  tetracycline.treated.cultures.df$Strain <- as.factor(tetracycline.treated.cultures.df$Strain)
  tetracycline.treated.cultures.df$Time<- as.numeric(as.vector(tetracycline.treated.cultures.df$Time))
  tetracycline.treated.cultures.df$OD<- as.numeric(as.vector(tetracycline.treated.cultures.df$OD))
  #compute mean and sd per strain/concentration/time point combination
  tetracycline.treated.cultures.df.v2 <- compute.mean.and.sd(tetracycline.treated.cultures.df)
  #create ggplot object to generate final figure
  tetracycline.treated.subpanel<- ggplot(tetracycline.treated.cultures.df.v2, aes(x=Time,y=OD, group=Strain,colour=Strain)) + 
    geom_line() +
    geom_point()+
    geom_errorbar(aes(ymin=OD-sd, ymax=OD+sd), width=0.75,alpha=0.15)+
    theme_classic()
  #generate Fig. 4A
grid.arrange(antibiotic.free.subpanel + scale_color_manual(values=strain.colors)+ 
theme(axis.text=element_text(size=10),axis.title = element_text(size=10,face="bold")) + xlab("Time (h)") + ylab("OD600")+ggtitle("[TET]=0"),tetracycline.treated.subpanel+ scale_color_manual(values=strain.colors[3:5])+ 
theme(axis.text=element_text(size=10),axis.title = element_text(size=10,face="bold")) + xlab("Time (h)") + ylab("OD600")+ggtitle("[TET]=20ug/ml"),nrow=1)
```
4.2 Fig. S3
```{r}
#plot growth curves of WT and TetR in antibiotic-free cultures 
#create ggplot object
temporal.plot<- ggplot(antibiotic.free.cultures.df.v2[which(antibiotic.free.cultures.df.v2$Strain =="WT" | antibiotic.free.cultures.df.v2$Strain == "TetR"),]
                       , aes(x=Time,y=OD, group=Strain,colour=Strain)) + 
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=OD-sd, ymax=OD+sd), width=0.75,alpha=0.5)+
  theme_classic()
print(temporal.plot + scale_color_manual(values=strain.colors[c(1,3)])+ 
        theme(axis.text=element_text(size=10),axis.title = element_text(size=10,face="bold")) + xlab("Time (h)")+ylab("OD600")+ggtitle("Fig. S3"))
#estimate fitness parameters shown in Fig. S3 with Growthcurver (for the first 24h of growth)
time.vector<-as.vector(time.data)
input.matrix.growthcurver<-as.data.frame(cbind(time.data[1:50],growth.data.normalized[1:50,]))
#rename column with time values
colnames(input.matrix.growthcurver)[1]<-"time"
#run Growthcurver
growthcurver.output <- SummarizeGrowthByPlate(input.matrix.growthcurver)
#print estimated max growth rate (mu) for WT and TetR in antibiotic-free medium
print(paste("mu (WT in LB without tetracycline) was:",round(mean(growthcurver.output[grep("WT_0",growthcurver.output$sample),"r"]),digits=2),sep=""))
print(paste("mu (TetR in LB without tetracycline) was:",round(mean(growthcurver.output[grep("TetR_0",growthcurver.output$sample),"r"]),digits=2),sep=""))
#print estimated area under the growth curve (AUC) values for WT and TetR in antibiotic-free medium
print(paste("AUC (WT in LB without tetracycline) was:",round(mean(growthcurver.output[grep("WT_0",growthcurver.output$sample),"auc_e"]),digits=2),sep=""))
print(paste("AUC (TetR in LB without tetracycline) was:",round(mean(growthcurver.output[grep("TetR_0",growthcurver.output$sample),"auc_e"]),digits=2),sep=""))
```
4.3 Fig. 4B-C
```{r}
#read OD data collected in experiment to evaluate the effect of arcA deletion in WT and TetR over three growth cycles
#in each cycle, cultures were started at an OD600 of 0.1 and grown to an OD600 of 1.0
#and thereafter diluted in fresh medium to OD600 of 0.1 to start a new cycle of growth
#three replicates per strain/antibiotic concentration were included
#last two columns are LB controls 
cyclic.growth.od<-read.csv("../Data/cyclic_growth/macrodilution_cyclic_growth_experiment_OD_data.csv",header=T)
#change format of time as before
cyclic.growth.od$Time<-change.time.format(cyclic.growth.od$Time)
par(mfrow=c(1,3))
#plot individual growth curves of TetR replicates in tetracycline-free LB
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_0_A!="")],y=cyclic.growth.od$TetR_0_A[which(cyclic.growth.od$TetR_0_A!="")],col=strain.colors["TetR"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,10),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=0")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_0_B!="")],y=cyclic.growth.od$TetR_0_B[which(cyclic.growth.od$TetR_0_B!="")],col=strain.colors["TetR"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_0_C!="")],y=cyclic.growth.od$TetR_0_C[which(cyclic.growth.od$TetR_0_C!="")],col=strain.colors["TetR"],pch=2,type="o")  
#plot individual growth curves of TetR DarcA replicates in tetracycline-free LB
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_0_A!="")],y=cyclic.growth.od$TetRarcA_0_A[which(cyclic.growth.od$TetRarcA_0_A!="")],col=strain.colors["TetRarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_0_B!="")],y=cyclic.growth.od$TetRarcA_0_B[which(cyclic.growth.od$TetRarcA_0_B!="")],col=strain.colors["TetRarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_0_C!="")],y=cyclic.growth.od$TetRarcA_0_C[which(cyclic.growth.od$TetRarcA_0_C!="")],col=strain.colors["TetRarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#plot individual growth curves of TetR replicates in LB with 20 ug/ml of tetracycline
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_20_A!="")],y=cyclic.growth.od$TetR_20_A[which(cyclic.growth.od$TetR_20_A!="")],col=strain.colors["TetR"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,40),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=20 ug/ml")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_20_B!="")],y=cyclic.growth.od$TetR_20_B[which(cyclic.growth.od$TetR_20_B!="")],col=strain.colors["TetR"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetR_20_C!="")],y=cyclic.growth.od$TetR_20_C[which(cyclic.growth.od$TetR_20_C!="")],col=strain.colors["TetR"],pch=2,type="o")  
#plot individual growth curves of TetR DarcA replicates in LB with 20 ug/ml
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_20_A!="")],y=cyclic.growth.od$TetRarcA_20_A[which(cyclic.growth.od$TetRarcA_20_A!="")],col=strain.colors["TetRarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_20_B!="")],y=cyclic.growth.od$TetRarcA_20_B[which(cyclic.growth.od$TetRarcA_20_B!="")],col=strain.colors["TetRarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$TetRarcA_20_C!="")],y=cyclic.growth.od$TetRarcA_20_C[which(cyclic.growth.od$TetRarcA_20_C!="")],col=strain.colors["TetRarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#add legend
plot.new()
legend("center",col=strain.colors[c("TetR","TetRarcA")],legend = c("TetR",expression(paste("TetR ",Delta, "arcA"))),lty=1)
```
4.4 Fig. S6
```{r}
#create same plot for WT and WT DarcA
par(mfrow=c(1,3))
#plot individual growth curves of WT replicates in tetracycline-free LB
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0_A!="")],y=cyclic.growth.od$WT_0_A[which(cyclic.growth.od$WT_0_A!="")],col=strain.colors["WT"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,7),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=0") #replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0_B!="")],y=cyclic.growth.od$WT_0_B[which(cyclic.growth.od$WT_0_B!="")],col=strain.colors["WT"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0_C!="")],y=cyclic.growth.od$WT_0_C[which(cyclic.growth.od$WT_0_C!="")],col=strain.colors["WT"],pch=2,type="o")  
#plot individual growth curves of WT DarcA replicates in tetracycline-free LB
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0_A!="")],y=cyclic.growth.od$WTarcA_0_A[which(cyclic.growth.od$WTarcA_0_A!="")],col=strain.colors["WTarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0_B!="")],y=cyclic.growth.od$WTarcA_0_B[which(cyclic.growth.od$WTarcA_0_B!="")],col=strain.colors["WTarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0_C!="")],y=cyclic.growth.od$WTarcA_0_C[which(cyclic.growth.od$WTarcA_0_C!="")],col=strain.colors["WTarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#plot individual growth curves of WT replicates in LB with 0.75 ug/ml of tetracycline
#replicate A
plot(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0.75_A!="")],y=cyclic.growth.od$WT_0.75_A[which(cyclic.growth.od$WT_0.75_A!="")],col=strain.colors["WT"],pch=0,type="o",xlab="Time (h)",ylab="OD600",xlim=c(0,30),ylim=c(0,1.1),cex.axis=1.5,cex.lab=1.5,main="[TET]=0.75 ug/ml")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0.75_B!="")],y=cyclic.growth.od$WT_0.75_B[which(cyclic.growth.od$WT_0.75_B!="")],col=strain.colors["WT"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WT_0.75_C!="")],y=cyclic.growth.od$WT_0.75_C[which(cyclic.growth.od$WT_0.75_C!="")],col=strain.colors["WT"],pch=2,type="o")  
#plot individual growth curves of WT DarcA replicates in LB with 0.75 ug/ml
#replicate A
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0.75_A!="")],y=cyclic.growth.od$WTarcA_0.75_A[which(cyclic.growth.od$WTarcA_0.75_A!="")],col=strain.colors["WTarcA"],pch=0,type="o")  
#replicate B
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0.75_B!="")],y=cyclic.growth.od$WTarcA_0.75_B[which(cyclic.growth.od$WTarcA_0.75_B!="")],col=strain.colors["WTarcA"],pch=1,type="o")  
#replicate C
points(x=cyclic.growth.od$Time[which(cyclic.growth.od$WTarcA_0.75_C!="")],y=cyclic.growth.od$WTarcA_0.75_C[which(cyclic.growth.od$WTarcA_0.75_C!="")],col=strain.colors["WTarcA"],pch=2,type="o")  
abline(h=1,col="darkgrey",lty=2)
#add legend
plot.new()
legend("center",col=strain.colors[c("WT","WTarcA")],legend = c("WT",expression(paste("WT ",Delta, "arcA"))),lty=1)
```
4.5 Fig. 4D
```{r}
#run Growthcurver analysis again
input.matrix.growthcurver<-as.data.frame(cbind(time.data,growth.data.normalized))
#rename column with time information
colnames(input.matrix.growthcurver)[1]<-"time"
#run Growthcurver for the full 48h
growthcurver.output <- SummarizeGrowthByPlate(input.matrix.growthcurver)
#create Fig. 4D (left panel)
#data frame with Growthcurver-estimated AUCs of TetR, TetR DarcA and TetR DarcA + pRB3-arcA (episomal complemented strain)
auc.df<-c()
#extract AUC values of TetR from Growthcurver output
tetR.samples.positions<-grep("TetR_",growthcurver.output[,"sample"])
tetR.auc<-growthcurver.output[tetR.samples.positions,"auc_e"]
names(tetR.auc)<-growthcurver.output[tetR.samples.positions,"sample"]
#compute mean and SD for AUC values for each tetracycline concentration
temporal.auc.matrix<-cbind(sapply(1:7,function(x){mean(tetR.auc[((x-1)*6)+1:6])}),
                          sapply(1:7,function(x){sd(tetR.auc[((x-1)*6)+1:6])}))
#add values to data frame 
auc.df<-rbind(auc.df,temporal.auc.matrix)
#extract AUC values of TetR DarcA from Growthcurver output
tetR.arcA.samples.positions<-grep("TetRarcA_",growthcurver.output[,"sample"])
tetR.arcA.auc<-growthcurver.output[tetR.arcA.samples.positions,"auc_e"]
names(tetR.arcA.auc)<-growthcurver.output[tetR.arcA.samples.positions,"sample"]
#compute mean and SD for AUC values for each tetracycline concentration
temporal.auc.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.auc[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.auc[((x-1)*6)+1:6])}))
#add values to data frame 
auc.df<-rbind(auc.df,temporal.auc.matrix)
#extract AUC values of TetR DarcA + pRB3-arcA from Growthcurver output
tetR.arcA.pRB3.samples.positions<-grep("pRB3arcA_",growthcurver.output[,"sample"])
tetR.arcA.pRB3.auc<-growthcurver.output[tetR.arcA.pRB3.samples.positions,"auc_e"]
names(tetR.arcA.pRB3.auc)<-growthcurver.output[tetR.arcA.pRB3.samples.positions,"sample"]
#compute mean and SD for AUC values for each tetracycline concentration
temporal.auc.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.pRB3.auc[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.pRB3.auc[((x-1)*6)+1:6])}))
#add values to data frame 
auc.df<-rbind(auc.df,temporal.auc.matrix)
#make sure data frame format is correct
auc.tet.concentrations<-factor(rep(tetracycline.concentrations,3),levels = seq(0,24,by=4))
auc.strains<-factor(rep(c("TetR","TetRarcA","pRB3arcA"),each=7),levels = c("TetR","TetRarcA","pRB3arcA"))
#add columns with tetracycline concentration and strain information
auc.df<-cbind(as.data.frame(auc.df),auc.tet.concentrations,auc.strains)
colnames(auc.df)<-c("auc","sd","Concentration","Strain")
#create ggplot object
plot.auc.estimates<- ggplot(auc.df, aes(x=Concentration,y=auc, group=Strain,colour=Strain)) +
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=auc-sd, ymax=auc+sd), width=0.75) + theme_classic() +theme(aspect.ratio = 1)
#similar analysis for lag phase proxy (Fig. 4D right panel)
#point of inflection (ie, time point for 1/2 max OD) was used as a proxy for lag phase
#data frame with Growthcurver-estimated lag phase proxy of TetR, TetR DarcA and TetR DarcA + pRB3-arcA (episomal complemented strain)
lag.df<-c()
#extract point of inflection values of TetR from Growthcurver output
tetR.lag<-growthcurver.output[tetR.samples.positions,"t_mid"]
names(tetR.lag)<-growthcurver.output[tetR.samples.positions,"sample"]
#compute mean and SD for inflection point values for each tetracycline concentration
temporal.lag.matrix<-cbind(sapply(1:7,function(x){mean(tetR.lag[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.lag[((x-1)*6)+1:6])}))
#add values to data frame 
lag.df<-rbind(lag.df,temporal.lag.matrix)
#extract point of inflection values of TetR DarcA from Growthcurver output
tetR.arcA.lag<-growthcurver.output[tetR.arcA.samples.positions,"t_mid"]
names(tetR.arcA.lag)<-growthcurver.output[tetR.arcA.samples.positions,"sample"]
#compute mean and SD for inflection point values for each tetracycline concentration
temporal.lag.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.lag[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.lag[((x-1)*6)+1:6])}))
#add values to data frame 
lag.df<-rbind(lag.df,temporal.lag.matrix)
#extract point of inflection values of TetR DarcA + pRB3-arcA from Growthcurver output
tetR.arcA.pRB3.lag<-growthcurver.output[tetR.arcA.pRB3.samples.positions,"t_mid"]
temporal.lag.matrix<-cbind(sapply(1:7,function(x){mean(tetR.arcA.pRB3.lag[((x-1)*6)+1:6])}),
                   sapply(1:7,function(x){sd(tetR.arcA.pRB3.lag[((x-1)*6)+1:6])}))
#add values to data frame 
lag.df<-rbind(lag.df,temporal.lag.matrix)
#add columns with tetracycline concentration and strain information
lag.df<-cbind(as.data.frame(lag.df),auc.tet.concentrations,auc.strains)
colnames(lag.df)<-c("lag","sd","Concentration","Strain")
#if estimated point of inflection was > 48h, then the value was set to 48
lag.df$sd[which(lag.df$lag>48)]<-0
lag.df$lag[which(lag.df$lag>48)]<-48
#create ggplot object
plot.lag.estimates<- ggplot(lag.df, aes(x=Concentration,y=lag, group=Strain,colour=Strain)) +
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=lag-sd, ymax=lag+sd), width=0.75) + theme_classic() +theme(aspect.ratio = 1)
#create Fig. 4D
grid.arrange(plot.auc.estimates + scale_color_manual(values=strain.colors[3:5])+
        theme(axis.text=element_text(size=10), axis.title = element_text(size=10),
              panel.border = element_rect(colour = "black", fill=NA, size=0.5)) 
      + xlab("[TET](ug/ml)")+ylab("AUC")+ggtitle("Fig. 4D"), plot.lag.estimates + scale_color_manual(values=strain.colors[3:5])+
        theme(axis.text=element_text(size=10), axis.title = element_text(size=10),
              panel.border = element_rect(colour = "black", fill=NA, size=0.5))
      + xlab("[TET](ug/ml)")+ylab("~Lag phase (h)"), nrow=1)
```
4.6 Fig. 4E 
```{r}
#raw and processed data is available in the "Data/NADH_NAD/nadh_nad_measurement.xlsx" file
#results are the compilation of two independent experiments
#in each experiment, the NADH and NAD concentrations of WT, WT DarcA, TetR and TetR DarcA were measured with and without tetracycline (0.75 ug/ml for WT and WT DarcA, and 4 ug/ml for TetR and TetR DarcA)
#NADH/NAD ratios in 1st experiment, cultures without tetracycline
wt.no.tetracycline.expt1<-c(0.025735294,0.052434457,0.046875)
wt.arcA.no.tetracycline.expt1<-c(0.071917808,0.081632653,0.070110701)
tetR.no.tetracycline.expt1<-c(0.075117371,0.081545064,0.043715847)
tetR.arcA.no.tetracycline.expt1<-c(0.380645161,0.129943503,0.197740113)
#NADH/NAD ratios in 1st experiment, cultures with tetracycline
wt.with.tetracycline.expt1<-c(0.05524861878,0.02032520325,0.02926829268)
wt.arcA.with.tetracycline.expt1<-c(0.04545454545,0.0351758794,0.05803571429)
tetR.with.tetracycline.expt1<-c(0.1631944444,0.1936507937,0.1032258065)
tetR.arcA.with.tetracycline.expt1<-c(0.1302931596,0.3312883436,0.2218181818)
#NADH/NAD ratios in 2nd experiment, cultures without tetracycline
wt.no.tetracycline.expt2<-c(0.052816901,0.069204152)
wt.arcA.no.tetracycline.expt2<-c()
tetR.no.tetracycline.expt2<-c(0.06185567,0.052631579)
tetR.arcA.no.tetracycline.expt2<-c(0.090425532,0.142857143,0.11409396)
#NADH/NAD ratios in 2nd experiment, cultures with tetracycline
wt.with.tetracycline.expt2<-c(0.074074074,0.050458716)
wt.arcA.with.tetracycline.expt2<-c(0.045801527,0.036363636)
tetR.with.tetracycline.expt2<-c(0.116935484,0.120622568)
tetR.arcA.with.tetracycline.expt2<-c(0.092050209,0.055793991,0.089622642)
#combine data from the two experiments
#cultures without tetracycline
wt.no.tetracycline<-c(wt.no.tetracycline.expt1,wt.no.tetracycline.expt2)
wt.arcA.no.tetracycline<-c(wt.arcA.no.tetracycline.expt1,wt.arcA.no.tetracycline.expt2)
tetR.no.tetracycline<-c(tetR.no.tetracycline.expt1,tetR.no.tetracycline.expt2)
tetR.arcA.no.tetracycline<-c(tetR.arcA.no.tetracycline.expt1,tetR.arcA.no.tetracycline.expt2)
#cultures with tetracycline
wt.with.tetracycline<-c(wt.with.tetracycline.expt1,wt.with.tetracycline.expt2)
wt.arcA.with.tetracycline<-c(wt.arcA.with.tetracycline.expt1,wt.arcA.with.tetracycline.expt2)
tetR.with.tetracycline<-c(tetR.with.tetracycline.expt1,tetR.with.tetracycline.expt2)
tetR.arcA.with.tetracycline<-c(tetR.arcA.with.tetracycline.expt1,tetR.arcA.with.tetracycline.expt2)
boxplot(wt.no.tetracycline,wt.with.tetracycline,wt.arcA.no.tetracycline,wt.arcA.with.tetracycline,
        tetR.no.tetracycline,tetR.with.tetracycline,tetR.arcA.no.tetracycline,tetR.arcA.with.tetracycline, col=rep(strain.colors[1:4],each=2),
        names=paste(rep(c("WT","WT arcA","TetR","TetR arcA"),each=2),rep(c("(-)","(+)"),4),sep=""),las=2,
        ylab="NADH/NAD",ylim=c(0,0.4),cex=.7,cex.lab=.7, cex.axis=.7,main="Fig. 4E")
#T-test with respect to wt(-TET)
print(paste("T-test p-value for WT(-) vs WT arcA(-):",t.test(wt.no.tetracycline,wt.arcA.no.tetracycline)$p.value,sep=""))
print(paste("T-test p-value for WT(-) vs TetR(-):",t.test(wt.no.tetracycline,tetR.no.tetracycline)$p.value,sep=""))
print(paste("T-test p-value for WT(-) vs TetR arcA(-):",t.test(wt.no.tetracycline,tetR.arcA.no.tetracycline)$p.value,sep=""))
#T-test (-TET) vs (+TET) for each strain
print(paste("T-test p-value for WT(+) vs WT(-):",t.test(wt.no.tetracycline,wt.with.tetracycline)$p.value,sep=""))
print(paste("T-test p-value for WT arcA(+) vs WT arcA(-):",t.test(wt.arcA.no.tetracycline,wt.arcA.with.tetracycline)$p.value,sep=""))
print(paste("T-test p-value for TetR(+) vs TetR(-):",t.test(tetR.no.tetracycline,tetR.with.tetracycline)$p.value,sep=""))
print(paste("T-test p-value for TetR arcA(+) vs TetR arcA(-):",t.test(tetR.arcA.no.tetracycline,tetR.arcA.with.tetracycline)$p.value,sep=""))
```
5.1 Fig. 5A
```{r}
#read metabolome similarity matrix from Campos & Zampieri 2019 (Molecular cell-Table S3)
similarity.matrix<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/drug_gene_similarity_matrix.csv",row.names=1)
#read matrix with p-values for metabolic profile similarity from Campos & Zampieri 2019 (Molecular cell-Table S3)
pvalue.matrix<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/drug_gene_similarity_pvalue.csv",row.names=1)
#generate Fig. 5A
plot(x=similarity.matrix[,"arcA"],y=-1*log10(pvalue.matrix[rownames(similarity.matrix),"arcA"]),xlab="Drug treatment-arcA KO similarity",ylab="-log10 p-value", main="Fig. 5A")
#highlight the two treatments more similar to the arcA deletion
points(x=similarity.matrix[grep("Cefpiramide",rownames(similarity.matrix)),"arcA"],y=-1*log10(pvalue.matrix[grep("Cefpiramide",rownames(pvalue.matrix)),"arcA"]),col="red",pch=19)
text(x=similarity.matrix[grep("Cefpiramide",rownames(similarity.matrix)),"arcA"]-0.02,y=-1*log10(pvalue.matrix[grep("Cefpiramide",rownames(pvalue.matrix)),"arcA"]),"Cefpiramide")
points(x=similarity.matrix[grep("Sertraline",rownames(similarity.matrix)),"arcA"],y=-1*log10(pvalue.matrix[grep("Sertraline",rownames(pvalue.matrix)),"arcA"]),col="darkblue",pch=19)
text(x=similarity.matrix[grep("Sertraline",rownames(similarity.matrix)),"arcA"],y=-1*log10(pvalue.matrix[grep("Sertraline",rownames(pvalue.matrix)),"arcA"])+0.3,"Sertraline")
#check position of arcA KO metabolic profile within ranking  of all gene deletions for the top 2 compounds
arcA.sertraline.ranking<-length(which(as.numeric(similarity.matrix["Sertraline",]) > similarity.matrix["Sertraline","arcA"]))
print(paste("Position of arcA deletion in similarity ranking with respect to Sertraline treatment profile was:",arcA.sertraline.ranking+1,"/",nrow(similarity.matrix),sep=""))
arcA.cefpiramide.ranking<-length(which(as.numeric(similarity.matrix["Cefpiramide",]) > similarity.matrix["Cefpiramide","arcA"]))
print(paste("Position of arcA deletion in similarity ranking with respect to Cefpiramide treatment profile was:",arcA.cefpiramide.ranking+1,"/",nrow(similarity.matrix),sep=""))
```
5.2 Fig. S7
```{r}
#read metabolic profiles of treated E. coli (z-scores) 
#the dataset contains E. coli metabolic profiles (969 metabolites) during treatment with 1,279 compounds
#data sourced from Campos & Zampieri 2019 (Molecular cell-Table S1)
zampieri.metabolic.profiles<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/zscores_metabolic_profile.csv",header=T)
#plot distribution of Z-scores in sertraline treatment
par(mfrow=c(1,2))
hist(zampieri.metabolic.profiles[,"Sertraline"],breaks=100,xlab="Metabolite z-score",main="Sertraline treatment")
#define threshold to consider a metabolite as responsive to sertraline (top 10% absolute value)
sertraline.zscore.threshold<-as.numeric(quantile(abs(zampieri.metabolic.profiles[,"Sertraline"]),0.9,na.rm = T))
#define metabolites whose concentration was altered by sertraline
sertraline.treatment.differential.metabolites<-zampieri.metabolic.profiles[which(abs(zampieri.metabolic.profiles[,"Sertraline"])>= sertraline.zscore.threshold),"Metabolite_ids"]
#remove duplication
sertraline.treatment.differential.metabolites<-unique(sertraline.treatment.differential.metabolites)
#remove empty entry
sertraline.treatment.differential.metabolites<-as.vector(sertraline.treatment.differential.metabolites[-26])
#split entries with multiple KEGG IDs
sertraline.treatment.differential.metabolites<-sapply(1:length(sertraline.treatment.differential.metabolites),function(x){strsplit(sertraline.treatment.differential.metabolites[x],split="~")[[1]]})
#save file with KEGG id of altered metabolites
#write.csv(file="../Data/Metabolic_profiles/Campos_Zampieri_data/differential_metabolites_sertraline_08302022.csv",array(as.character(unlist(sertraline.treatment.differential.metabolites))))
#repeat analysis for cefpiramide data
#plot distribution of Z-scores in cefpiramide treatment
hist(zampieri.metabolic.profiles[,"Cefpiramide"],breaks=100,xlab="Metabolite z-score",main="Cefpiramide treatment")
#define threshold to consider a metabolite as responsive to cefpiramide
cefpiramide.zscore.threshold<-as.numeric(quantile(abs(zampieri.metabolic.profiles[,"Cefpiramide"]),0.9,na.rm = T))
#define metabolites whose concentration was altered by cefpiramide
cefpiramide.treatment.differential.metabolites<-zampieri.metabolic.profiles[which(abs(zampieri.metabolic.profiles[,"Cefpiramide"])>= cefpiramide.zscore.threshold),"Metabolite_ids"]
#remove duplication
cefpiramide.treatment.differential.metabolites<-unique(cefpiramide.treatment.differential.metabolites)
#remove empty entry
cefpiramide.treatment.differential.metabolites<-as.vector(cefpiramide.treatment.differential.metabolites[-47])
#split entries with multiple KEGG IDs
cefpiramide.treatment.differential.metabolites<-sapply(1:length(cefpiramide.treatment.differential.metabolites),function(x){strsplit(cefpiramide.treatment.differential.metabolites[x],split="~")[[1]]})
#save IDs of altered metabolites
#write.csv(file="../Data/Metabolic_profiles/Campos_Zampieri_data/differential_metabolites_cefpiramide_related_08302022.csv",array(as.character(unlist(cefpiramide.treatment.differential.metabolites))))
#perform differential expression analysis of arcA KO transcriptional data from Park et al. 2013 (PLoS Genetics)
#read normalized GEO data (accession ID: GSE46415)
arcA.ko.park.transcriptomes<-read.csv("../Data/Differential_expression_analysis/GEO_Park2013/arcA_ko_transcriptomics_Park et al_2013.csv",header=T,row.names = 1)
#update row names with locus tag
rownames(arcA.ko.park.transcriptomes)<-sapply(1:nrow(arcA.ko.park.transcriptomes),function(x){strsplit(rownames(arcA.ko.park.transcriptomes)[x],split = "0710")[[1]][1]})
#log2 transform the values
arcA.ko.park.transcriptomes<-log2(arcA.ko.park.transcriptomes)
#perform differential expression analysis with BayesT as before
arcA.ko.response.park<-bayesT(arcA.ko.park.transcriptomes,numC = 3,numE = 3,conf = 7,doMulttest = T)
arcA.ko.degs.park<-rownames(arcA.ko.response.park)[which(arcA.ko.response.park$BH < 0.05 & abs(arcA.ko.response.park$meanC - arcA.ko.response.park$meanE)>1)]
#save file with DEGs
#write.csv(file="../Data/Metabolic_profiles/Campos_Zampieri_data/arcA_KO_DEGs_Park2013.csv",arcA.ko.degs.park)
#identify metabolites whose concentration is altered by the deletion of arcA
#data  sourced from Fuhrer et al. (Molecular Systems Biology 2017)
#data was downloaded from the BioStudies database (accession S-BSST5)
negative.ions.to.metabolites.map<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/neg_kegg_all_3mD.csv")
positive.ions.to.metabolites.map<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/pos_kegg_all_3mD.csv")
negative.ions.zscores<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/negative_ions_zscores_gene_kos.csv",header=T,row.names = 1)
positive.ions.zscores<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/positive_ions_zscores_gene_kos.csv",header=T,row.names = 1)
#combine all z-scores in a single matrix
all.ions.zscores<-rbind(as.matrix(positive.ions.zscores),as.matrix(negative.ions.zscores))
#define threshold for top 10% absolute z-scores for arcA KO
arcA.zscore.threshold<-as.numeric(quantile(abs(all.ions.zscores[,"arcA"]),0.9))
#define vectors with arcA KO z-scores
arcA.positive.ions.zscores<-positive.ions.zscores[,"arcA"]
arcA.negative.ions.zscores<-negative.ions.zscores[,"arcA"]
#identify decreased metabolites associated with (+) ions
arcA.positive.ions.decreased<-which(arcA.positive.ions.zscores <= -1* arcA.zscore.threshold)
#map altered metabolites to their KEGG ids
arcA.positive.ions.decreased.kegg<-positive.ions.to.metabolites.map[which(positive.ions.to.metabolites.map[,"ion"] %in% arcA.positive.ions.decreased),"id"]
#identify decreased metabolites associated with (-) ions
arcA.negative.ions.decreased<-which(arcA.negative.ions.zscores <= -1* arcA.zscore.threshold)
#map altered metabolites to their KEGG ids
arcA.negative.ions.decreased.kegg<-negative.ions.to.metabolites.map[which(negative.ions.to.metabolites.map[,"ion"] %in% arcA.negative.ions.decreased),"id"]
#combine all KEGG ids
arcA.decreased.metabolites<-union(arcA.positive.ions.decreased.kegg,arcA.negative.ions.decreased.kegg)
#identify increased metabolites associated with (+) ions
arcA.positive.ions.increased<-which(arcA.positive.ions.zscores >= arcA.zscore.threshold)
#map altered metabolites to their KEGG ids
arcA.positive.ions.increased.kegg<-positive.ions.to.metabolites.map[which(positive.ions.to.metabolites.map[,"ion"] %in% arcA.positive.ions.increased),"id"]
#identify increased metabolites associated with (-) ions
arcA.negative.ions.increased<-which(arcA.negative.ions.zscores >=  arcA.zscore.threshold)
arcA.negative.ions.increased.kegg<-negative.ions.to.metabolites.map[which(negative.ions.to.metabolites.map[,"ion"] %in% arcA.negative.ions.increased),"id"]
#combine all KEGG ids
arcA.increased.metabolites<-union(arcA.negative.ions.increased.kegg,arcA.positive.ions.increased.kegg)
#save metabolite ids in a .csv file
#write.csv(file="../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/arcA_ko_differential_metabolites_kegg_083122.csv",union(arcA.increased.metabolites,arcA.decreased.metabolites))
#generate Fig. S7
#read MetaboAnalyst output files
metaboanalyst.output.sertraline<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/MetaboAnalyst_sertraline_pathway_results_08302022.csv")
metaboanalyst.output.cefpiramide<-read.csv("../Data/Metabolic_profiles/Campos_Zampieri_data/MetaboAnalyst_cefpiramide_pathway_results_08302022.csv")
metaboanalyst.output.arcA.ko<-read.csv("../Data/Metabolic_profiles/Fuhrer_Zampieri_et_al_data/MetaboAnalyst_arcA_KO_result_pathway_083122.csv")
#choose color palette for heatmap
heatmap.colors <- c("black","grey","white")
#create tables with perturbed pathways based on FDR p-values
sertraline.perturbed.pathways<-metaboanalyst.output.sertraline[which(metaboanalyst.output.sertraline[,"FDR"]<= 0.25),c("X","FDR")]
cefpiramide.perturbed.pathways<-metaboanalyst.output.cefpiramide[which(metaboanalyst.output.cefpiramide[,"FDR"]<= 0.25),c("X","FDR")]
arcA.ko.perturbed.pathways<-metaboanalyst.output.arcA.ko[which(metaboanalyst.output.arcA.ko[,"FDR"]<= 0.25),c("X","FDR")]
#create vector with altered metabolic pathways
perturbed.pathways.combined<-unique(c(as.character(sertraline.perturbed.pathways[,"X"]),as.character(cefpiramide.perturbed.pathways[,"X"]),as.character(arcA.ko.perturbed.pathways[,"X"])))
#create matrix with pathways identified as perturbed by MetaboAnalyst 
perturbed.pathways.matrix<-matrix(ncol=3,nrow=length(perturbed.pathways.combined),dimnames = list(perturbed.pathways.combined,c("arcA KO","Sertraline-treated","Cefpiramide-treated")),1)
#fill out the matrix
for(pathway in perturbed.pathways.combined)
{
  if( pathway %in% sertraline.perturbed.pathways[,"X"])
  {
    current.pathway.position<-which(sertraline.perturbed.pathways[,"X"]==pathway)
    perturbed.pathways.matrix[pathway,"Sertraline-treated"]<- sertraline.perturbed.pathways[current.pathway.position,"FDR"]
  }
  if( pathway %in% cefpiramide.perturbed.pathways[,"X"])
  {
    current.pathway.position<-which(cefpiramide.perturbed.pathways[,"X"]==pathway)
    perturbed.pathways.matrix[pathway,"Cefpiramide-treated"]<- cefpiramide.perturbed.pathways[current.pathway.position,"FDR"]
  }
  if( pathway %in% arcA.ko.perturbed.pathways[,"X"])
  {
    current.pathway.position<-which(arcA.ko.perturbed.pathways[,"X"]==pathway)
    perturbed.pathways.matrix[pathway,"arcA KO"]<- arcA.ko.perturbed.pathways[current.pathway.position,"FDR"]
  }
}
#plot heatmap
pheatmap(t(perturbed.pathways.matrix),color =heatmap.colors,scale = "none",cluster_rows=F,cluster_cols = F,fontsize = 8,angle_col = 90, breaks = c(0,0.1,0.25,1),main="Fig. S7")
```
5.3 Fig. 5B
```{r}
#assess susceptibility of the WT, WT DarcA, TetR and TetR DarcA strains to sertraline 
#read OD readings from Bioscreen experiments to generate corresponding growth curves
#first, read growth data for WT and WT DarcA treated with a range of sertraline concentrations 
#for each strain/sertraline concentration, there were three biological replicates (each one with three replicates)
#bioscreen experiment was performed for a total of ~25h
#WT strain was cultured in plate # 1 (ie, wells #1-100)
#WT DarcA strain was cultured in plate # 2 (ie, wells #101-200)
growth.data.WT.WTarcA.sertraline<-read.csv("../Data/Sertraline_dilution/WT_and_WT arcA/2022_0222_formatted_data.csv",row.names=1)
#normalize data by subtracting minimum OD reading per well 
#normalized matrix includes the first ~16h of growth
growth.data.WT.WTarcA.sertraline.16h.normalized<-c()
for(i in 1:ncol(growth.data.WT.WTarcA.sertraline))
{
  minimum.reading.current.well<-min(growth.data.WT.WTarcA.sertraline[1:33,i])
  growth.data.WT.WTarcA.sertraline.16h.normalized<-cbind(growth.data.WT.WTarcA.sertraline.16h.normalized,growth.data.WT.WTarcA.sertraline[1:33,i]-minimum.reading.current.well)
}
#add column names
colnames(growth.data.WT.WTarcA.sertraline.16h.normalized)<-colnames(growth.data.WT.WTarcA.sertraline)
#define maximum OD among blank wells (wells # 92-100 were included as a control for WT, and wells # 192-200 as a control for WT arcA) 
#1st OD reading was excluded in this analysis because it commonly gives a higher than expected reading (ie, higher than the reading taken a few minutes later)
wt.max.blank<-max(growth.data.WT.WTarcA.sertraline.16h.normalized[-1,92:100])
wt.arcA.max.blank<-max(growth.data.WT.WTarcA.sertraline.16h.normalized[-1,192:200])
#define wells where growth was observed as those where max OD > 2x maximum growth detected in blank wells (inoculated with sterile LB)
wt.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.WT.WTarcA.sertraline.16h.normalized[-1,2:90])>2*wt.max.blank))
wt.arcA.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.WT.WTarcA.sertraline.16h.normalized[-1,102:190])>2*wt.arcA.max.blank))
#estimate WT MIC for sertraline
wt.sertraline.concentrations<-paste("WT_",c(0,seq(30,60,by=5),80),sep="")
#define default MIC as the highest concentration that was tested
wt.sertraline.mic<-80
for(wt.conc in wt.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(wt.conc,split="_")[[1]][2])
if (length(grep(wt.conc,wt.wells.that.grew))<2 & temporal.concentration < wt.sertraline.mic)
{
  wt.sertraline.mic<-temporal.concentration
}
}
#estimate WT DarcA MIC for sertraline
wt.arcA.sertraline.concentrations<-paste("WTarcA_",c(0,seq(30,60,by=5),80),sep="") #values in ug/ml
#default MIC value
wt.arcA.sertraline.mic<-80
for(wt.arcA.conc in wt.arcA.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(wt.arcA.conc,split="_")[[1]][2])
if (length(grep(wt.arcA.conc,wt.arcA.wells.that.grew))<2 & temporal.concentration < wt.arcA.sertraline.mic)
{
  wt.arcA.sertraline.mic<-temporal.concentration
}
}
print(paste("WT Sertraline MIC:",wt.sertraline.mic,"ug/ml",sep=""))
print(paste("WT DarcA Sertraline MIC:",wt.arcA.sertraline.mic,"ug/ml",sep=""))
#second, read growth data for TetR and TetR DarcA treated with different sertraline concentrations 
#experiment was performed for a total of ~24h
#TetR strain was cultured in plate # 1 (ie, wells #1-100)
#TetR DarcA strain was cultured in plate # 2 (ie, wells #101-200)
growth.data.TetR.TetRarcA.sertraline<-read.csv("../Data/Sertraline_dilution/TetR_and_TetR arcA/2022_0221_formatted_data.csv",row.names=1)
#normalize data by subtracting minimum OD reading per well
#normalized matrix includes the first ~16h of growth
growth.data.TetR.TetRarcA.sertraline.16h.normalized<-c()
for(i in 1:ncol(growth.data.TetR.TetRarcA.sertraline))
{
  minimum.reading.current.well<-min(growth.data.TetR.TetRarcA.sertraline[1:33,i])
  growth.data.TetR.TetRarcA.sertraline.16h.normalized<-cbind(growth.data.TetR.TetRarcA.sertraline.16h.normalized,                                    growth.data.TetR.TetRarcA.sertraline[1:33,i]-minimum.reading.current.well)
}
#add column names
colnames(growth.data.TetR.TetRarcA.sertraline.16h.normalized)<-colnames(growth.data.TetR.TetRarcA.sertraline)
#define maximum OD among blank wells (wells # 92-100 were included as a control for TetR, and wells # 192-200 as a control for TetR arcA) 
tetR.max.blank<-max(growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,92:100])
tetR.arcA.max.blank<-max(growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,192:200])
#define wells where growth was observed as those where max OD > 2x maximum growth detected in blank wells (inoculated with sterile LB)
tetR.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,2:90])>2*tetR.max.blank))
tetR.arcA.wells.that.grew<-names(which(apply(FUN=max, MARGIN = 2,growth.data.TetR.TetRarcA.sertraline.16h.normalized[-1,102:190])>2*tetR.arcA.max.blank))
#estimate TetR MIC for sertraline
tetR.sertraline.concentrations<-paste("TetR_",c(0,seq(20,50,by=5),60),sep="")
#default value
tetR.sertraline.mic<-60
for(tetR.conc in tetR.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(tetR.conc,split="_")[[1]][2])
if (length(grep(tetR.conc,tetR.wells.that.grew))<2 & temporal.concentration < tetR.sertraline.mic)
{
  tetR.sertraline.mic<-temporal.concentration
}
}
#define TetR DarcA MIC for sertraline
tetR.arcA.sertraline.concentrations<-paste("TetRarcA_",c(0,5,seq(10,20,by=2.5),25,30),sep="") #values in ug/ml
#default MIC value
tetR.arcA.sertraline.mic<-30
for(tetR.arcA.conc in tetR.arcA.sertraline.concentrations)
{
temporal.concentration<-as.numeric(strsplit(tetR.arcA.conc,split="_")[[1]][2])
if (length(grep(tetR.arcA.conc,tetR.arcA.wells.that.grew))<2 & temporal.concentration < tetR.arcA.sertraline.mic)
{
  tetR.arcA.sertraline.mic<-temporal.concentration
}
}
print(paste("TetR Sertraline MIC:",tetR.sertraline.mic,"ug/ml",sep=""))
print(paste("TetR DarcA Sertraline MIC:",tetR.arcA.sertraline.mic,"ug/ml",sep=""))
#create Fig. 5B
#create vector with all sertraline concentrations (ug/ml) tested among the four strains
sertraline.concentrations<-c(0,5,10,12.5,15,17.5,20,25,30,35,40,45,50,55,60,80)
#define growth pattern (1= growth, 0= no growth, -1: not tested) of each strain in the full range of sertraline concentrations 
tetR.dose.response<-c(1,rep(-1,5),rep(1,3),rep(0,4),-1, 0,-1)
tetR.arcA.dose.response<-c(rep(1,5),rep(0,4), rep(-1,7))
wt.dose.response<-c(1, rep(-1,7),rep(1,3),rep(0,5))
wt.arcA.dose.response<- c(1, rep(-1,7),rep(1,4),rep(0,4))
#create dose response matrix
sertraline.dose.response.matrix<-cbind(wt.dose.response, wt.arcA.dose.response, 
                             tetR.dose.response,tetR.arcA.dose.response)
rownames(sertraline.dose.response.matrix)<-sertraline.concentrations
colnames(sertraline.dose.response.matrix)<-c("WT","WT arcA", "TetR","TetR arcA")
heatmap.breaks<-c(-1,-0.33,0.33,1)
pheatmap(sertraline.dose.response.matrix[16:1,],scale="none",color= c("grey","#BEAED4","#7FC97F"),
         cluster_rows=F,cluster_cols = F,fontsize = 11,angle_col = 45, breaks = heatmap.breaks,
         legend_breaks=heatmap.breaks,legend=T,main="Fig. 5B",annotation_row = )
```
5.4 Compute FIC2 scores of WT for the sertraline-tetracycline combination 
```{r}
#analyze results of DiaMOND assay for the sertraline-tetracycline combination
#function to create a monotonically decreasing dose response curve (as described in the original DiaMOND manuscript, Cokol et al. Science Advances 2017)
create.monotonically.decreasing.vector<-function(od.vector)
{
  output.vector<-c(od.vector[1])
  for(i in 2:length(od.vector))
  {
    current.point<-od.vector[i]
    if(current.point<= od.vector[i-1] & current.point <= output.vector[i-1])
    {
      output.vector<-c(output.vector,current.point)
    }
    else{
      output.vector<-c(output.vector,output.vector[i-1])
    }
  }
  output.vector
}
#function to normalize OD vector at a given time point with respect to an untreated control (initial point)
normalize.vector<-function(od.vector)
{
  output.vector<-od.vector/od.vector[1]
  output.vector
}
#function to interpolate IC50 values
interpolate.ic50<-function(normalized.od.vector,abx.concentration.range)
{
  #abx.concentration.range is the set of antibiotic concentrations associated with the dose response vector (ie, the normalized od vector)
  #confirm that the normalized vector contains values above and below 0.5 (ie, the IC50 point)
  positions.below.ic50<-which(normalized.od.vector < 0.5)
  positions.above.ic50<-which(normalized.od.vector > 0.5)
  if(length(positions.below.ic50)>0 & length(positions.above.ic50)>0)
  {
    #spline fitting
    temporal.spline.fitting<-spline(abx.concentration.range, normalized.od.vector, n=10000000)
    #estimate IC50
    estimated.ic50<-temporal.spline.fitting$x[which(abs(temporal.spline.fitting$y-0.5)==min(abs(temporal.spline.fitting$y-0.5)))]
  }
  estimated.ic50
}
#analyze DiaMOND results for WT and WT DarcA strains
#data is in a matrix with 384 columns (each column is a well in the plate)
wt.wt.arcA.biotek.data<-read.csv("../Data/DiaMOND_assay/Sertraline_Tetracycline/WT_and_WT arcA/2022_0226_formatted_data.CSV",row.names=1)
#in the 384 well plate:
#rows # 1-5 were used for sertraline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 6-10 were used for tetracycline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 11-15 were used for sertraline-tetracycline combination linear dilutions (ie, 0x-2x by 0.2 increments)
#columns #1 and #24 were blank (LB only) 
#columns #2-12 were inoculated with WT and columns # 13-23 were inoculated with WT arcA KO 
#remove column with temperature data
wt.wt.arcA.biotek.data<-wt.wt.arcA.biotek.data[,-1]
#define difference in OD (delta OD) for each well after 16h of growth (max OD - min OD)
#the 16h growth window was defined based on the DiaMOND protocol (Cokol-Cakmak et al. JOVE 2018)
wt.wt.arcA.biotek.delta.od<-c()
for(c in 1:ncol(wt.wt.arcA.biotek.data))
{
  wt.wt.arcA.biotek.delta.od<-c(wt.wt.arcA.biotek.delta.od,                  max(wt.wt.arcA.biotek.data[1:33,c])-min(wt.wt.arcA.biotek.data[1:33,c]))
}
names(wt.wt.arcA.biotek.delta.od)<-colnames(wt.wt.arcA.biotek.data)
#extract delta OD values for WT strain
wt.delta.od.matrix<-c()
#one row will correspond to a linear dilution (from 0x to 2x by 0.2x increments)
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,2:12,sep="")
  wt.delta.od.matrix<-rbind(wt.delta.od.matrix,wt.wt.arcA.biotek.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
wt.ic50.vector<-c()
concentration.linear.range<-seq(0,2,by=0.2)
for(r in 1:nrow(wt.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(wt.delta.od.matrix[r,])
  #plot delta OD data (ie, effect of treatment on growth)
  if(r <6)
  {
  plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
       xlab="[Sertraline(x IC50)]",ylab='Normalized OD')
  abline(h=0.5,col="grey",lty=2)
  }
  if(r >5 & r <11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="grey",lty=2)
  }
  if(r >10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline-Sertraline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="grey",lty=2)
  }
  #define IC50 using the approxfun function
  temporal.approx.func.output<-approxfun(x=normalize.vector(current.od.vector),y=concentration.linear.range)
  ic50.interpolated.value<-temporal.approx.func.output(0.5)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  wt.ic50.vector<-c(wt.ic50.vector,round(ic50.interpolated.value,digits=3))
}
#define WT FIC2 for each biological replicate
wt.fic<-c()
for(i in 1:5)
{
  wt.fic<-c(wt.fic,wt.ic50.vector[i+10]/(2*(wt.ic50.vector[i]*wt.ic50.vector[i+5])/(wt.ic50.vector[i]+wt.ic50.vector[i+5])))
}
print(paste("WT FIC2:",round(geoMean(wt.fic),digits = 3),sep=""))
```
5.5 Compute FIC2 scores of WT arcA for the sertraline-tetracycline combination
```{r}
#extract delta OD values for the WT arcA strain
wt.arcA.delta.od.matrix<-c()
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,13:23,sep="")
  wt.arcA.delta.od.matrix<-rbind(wt.arcA.delta.od.matrix,wt.wt.arcA.biotek.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
wt.arcA.ic50.vector<-c()
for(r in 1:nrow(wt.arcA.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(wt.arcA.delta.od.matrix[r,])
  #plot delta OD data
   if(r <6)
  {
  plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
       xlab="[Sertraline(x IC50)]",ylab='Normalized OD')
  abline(h=0.5,col="red",lty=2)
  }
  if(r >5 & r < 11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  #define IC50 using the approxfun function
  temporal.approx.func<-approxfun(x=normalize.vector(current.od.vector),y=concentration.linear.range)
  ic50.interpolated.value<-temporal.approx.func(0.5)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  wt.arcA.ic50.vector<-c(wt.arcA.ic50.vector,round(ic50.interpolated.value,digits=3))
}
#compute FIC2 of WT arcA replicates
wt.arcA.fic<-c()
for(i in 1:5)
{
  wt.arcA.fic<-c(wt.arcA.fic,wt.arcA.ic50.vector[i+10]/(2*(wt.arcA.ic50.vector[i]*wt.arcA.ic50.vector[i+5])/(wt.arcA.ic50.vector[i]+wt.arcA.ic50.vector[i+5])))
}
print(paste("WT arcA FIC2:",round(geoMean(wt.arcA.fic),digits=3),sep=""))
```
5.6 Compute FIC2 of TetR arcA for the sertraline-tetracycline combination
```{r}
#analyze DiaMOND results of the TetR and TetR DarcA strains
#data is in a matrix with 384 columns (each column is a well in a plate)
tetR.tetR.arcA.biotek.data<-read.csv("../Data/DiaMOND_assay/Sertraline_Tetracycline/TetR_and_TetR arcA/run1/2022_0301_formatted_data.CSV",row.names=1)
#in the 384 well plate:
#rows # 1-5 were used for sertraline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 6-10 were used for tetracycline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 11-15 were used for sertraline-tetracycline combination linear dilutions (ie, 0x-2x by 0.2 increments)
#columns #1 and #24 were blank (LB only) 
#columns #2-12 were inoculated with TetR and columns # 13-23 were inoculated with TetR arcA KO 
#remove column with temperature data
tetR.tetR.arcA.biotek.data<-tetR.tetR.arcA.biotek.data[,-1]
#define difference in OD (delta OD) for each well after 16h of growth (max OD - min OD)
#the 16h growth window was defined based on the DiaMOND protocol (Cokol-Cakmak et al. JOVE 2018)
tetR.tetR.arcA.delta.od<-c()
for(c in 1:ncol(tetR.tetR.arcA.biotek.data))
{
  tetR.tetR.arcA.delta.od<-c(tetR.tetR.arcA.delta.od,                            max(tetR.tetR.arcA.biotek.data[1:33,c])-min(tetR.tetR.arcA.biotek.data[1:33,c]))
}
names(tetR.tetR.arcA.delta.od)<-colnames(tetR.tetR.arcA.biotek.data)
#extract delta OD values for TetR arcA strain
tetR.arcA.delta.od.matrix<-c()
#one row will correspond to a linear dilution (from 0x to 2x by 0.2x increments)
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,13:23,sep="")
  tetR.arcA.delta.od.matrix<-rbind(tetR.arcA.delta.od.matrix,tetR.tetR.arcA.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
tetR.arcA.ic50.vector<-c()
for(r in 1:nrow(tetR.arcA.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(tetR.arcA.delta.od.matrix[r,])
  if(r<6)
  {
  plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
       xlab="[Sertraline(x IC50)]",ylab='Normalized OD')
  abline(h=0.5,col="red",lty=2)
  }
  if(r>5 & r<11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",
         xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD')
    abline(h=0.5,col="red",lty=2)
  }
  #define IC50 using the approxfun function
  temporal.approx.func<-approxfun(x=normalize.vector(current.od.vector),y=concentration.linear.range)
  ic50.interpolated.value<-temporal.approx.func(0.5)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  tetR.arcA.ic50.vector<-c(tetR.arcA.ic50.vector,round(ic50.interpolated.value,digits=3))
}
#define FIC2 for TetR arcA replicates
tetR.arcA.fic<-c()
for(i in 1:5)
{
  tetR.arcA.fic<-c(tetR.arcA.fic,tetR.arcA.ic50.vector[i+10]/(2*(tetR.arcA.ic50.vector[i]*tetR.arcA.ic50.vector[i+5])/(tetR.arcA.ic50.vector[i]+tetR.arcA.ic50.vector[i+5])))
}
print(paste("TetR arcA FIC2:",round(geoMean(tetR.arcA.fic),digits=3),sep=""))
```
5.7 Compute FIC2 of TetR 
```{r}
#extract delta OD values for TetR strain
#NOTE:replicates with more than one potential IC50 (on the raw data) or increased growth after potential IC50 were not used for FIC2 calculation
tetR.delta.od.matrix<-c()
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,2:12,sep="")
  tetR.delta.od.matrix<-rbind(tetR.delta.od.matrix,tetR.tetR.arcA.delta.od[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
tetR.ic50.vector<-c()
for(r in 1:nrow(tetR.delta.od.matrix))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(tetR.delta.od.matrix[r,])
  #we first plot the raw dose-response curve (black line)
  if(r<6)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix[r,]),type="l", xlab="[Sertraline(x IC50)]",ylab='Normalized OD',main="Expt1")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>5 & r<11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix[r,]),type="l",xlab="[Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt1")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix[r,]),type="l", xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt1")
    abline(h=0.5,col="red",lty=2)
  }
  #also plot monotonically decreasing dose response curve (orange line)
  points(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",col="orange")
  #define IC50 using the in-house spline fitting function (more accurate than approxfun)
  ic50.interpolated.value<-interpolate.ic50(normalize.vector(current.od.vector),concentration.linear.range)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  tetR.ic50.vector<-c(tetR.ic50.vector,round(ic50.interpolated.value,digits=3))
}
#replicates # 1, #3 and #4 not used for FIC2 estimation after visual inspection of the sertraline dose response curves 
#a second experiment (below) was performed  to increase the number of biological replicates
#define FIC2 of TetR replicates in experiment # 1
tetR.fic.expt1<-c()
for(i in 1:5)
{
  tetR.fic.expt1<-c(tetR.fic.expt1,tetR.ic50.vector[i+10]/(2*(tetR.ic50.vector[i]*tetR.ic50.vector[i+5])/(tetR.ic50.vector[i]+tetR.ic50.vector[i+5])))
}
tetR.fic.expt1<-tetR.fic.expt1[c(2,5)]
#read data collected on a second DiaMOND experiment for TetR (five biological replicates)
tetR.biotek.data.expt2<-read.csv("../Data/DiaMOND_assay/Sertraline_Tetracycline/TetR_and_TetR arcA/run2/2022_0413_formatted_data.CSV",row.names=1)
#in the 384 well plate:
#rows # 1-5 were used for sertraline linear dilutions (0x,.2x,.4x,.6x,.8x,1.0x,1.2x,1.6x,1.4x,1.8x,2.0x))
#rows # 6-10 were used for tetracycline linear dilutions (ie, 0x-2x by 0.2 increments)
#rows # 11-15 were used for sertraline-tetracycline combination linear dilutions (ie, 0x-2x by 0.2 increments)
#columns #1, #13-15 were blank (LB only) 
#columns #2-12 were inoculated with TetR 
#remove column with temperature data
tetR.biotek.data.expt2<-tetR.biotek.data.expt2[,-1]
#extract delta OD values 
tetR.biotek.delta.od.expt2<-c()
for(c in 1:ncol(tetR.biotek.data.expt2))
{
  tetR.biotek.delta.od.expt2<-c(tetR.biotek.delta.od.expt2,                                 max(tetR.biotek.data.expt2[1:33,c])-min(tetR.biotek.data.expt2[1:33,c]))
}
names(tetR.biotek.delta.od.expt2)<-colnames(tetR.biotek.data.expt2)
#define tetR delta OD matrix (expt #2)
tetR.delta.od.matrix.expt2<-c()
for(l in LETTERS[1:15])
{
  selected.wells<-paste(l,2:12,sep="")
  tetR.delta.od.matrix.expt2<-rbind(tetR.delta.od.matrix.expt2,tetR.biotek.delta.od.expt2[selected.wells])
}
#check delta OD profile and interpolate IC50s
par(mfrow=c(3,3))
tetR.ic50.vector.expt2<-c()
for(r in 1:nrow(tetR.delta.od.matrix.expt2))
{
  #generate monotonically decreasing delta OD vector
  current.od.vector<-create.monotonically.decreasing.vector(tetR.delta.od.matrix.expt2[r,])
  if(r<6)
  {
    plot(x=concentration.linear.range[c(1:7,9,8,10:11)],y=normalize.vector(tetR.delta.od.matrix.expt2[r,c(1:7,9,8,10:11)]),type="l",
         xlab="[Sertraline(x IC50)]",ylab='Normalized OD',main="Expt2")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>5 & r<11)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix.expt2[r,]),type="l",
         xlab="[Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt2")
    abline(h=0.5,col="red",lty=2)
  }
  if(r>10)
  {
    plot(x=concentration.linear.range,y=normalize.vector(tetR.delta.od.matrix.expt2[r,]),type="o",
         xlab="[Sertraline-Tetracycline(x IC50)]",ylab='Normalized OD',main="Expt2")
    abline(h=0.5,col="red",lty=2)
  }
  #also plot monotonically decreasing version of the dose response
  points(x=concentration.linear.range,y=normalize.vector(current.od.vector),type="o",col="orange")
  #define IC50 using the in-house spline fitting function
  ic50.interpolated.value<-interpolate.ic50(normalize.vector(current.od.vector),concentration.linear.range)
  #add estimated IC50 to figure
  points(x=ic50.interpolated.value,y=0.5,col="red",pch=2)
  #save estimated IC50
  tetR.ic50.vector.expt2<-c(tetR.ic50.vector.expt2,round(ic50.interpolated.value,digits=3))
}
#define FIC2 of TetR replicates in expt # 2
tetR.fic.expt2<-c()
for(i in 1:5)
{
  tetR.fic.expt2<-c(tetR.fic.expt2,tetR.ic50.vector.expt2[i+10]/(2*(tetR.ic50.vector.expt2[i]*tetR.ic50.vector.expt2[i+5])/(tetR.ic50.vector.expt2[i]+tetR.ic50.vector.expt2[i+5])))
}
#remove 5th replicate
tetR.fic.expt2<-tetR.fic.expt2[1:4]
#combine TetR FIC2 from both experiments
tetR.fic<-union(tetR.fic.expt1,tetR.fic.expt2)
print(paste("TetR FIC2:",round(geoMean(tetR.fic),digits=3),sep=""))
```
5.8 Fig. 5C
```{r}
boxplot(wt.fic,wt.arcA.fic,tetR.fic,tetR.arcA.fic,
        col=strain.colors[1:4],
        names=c("WT","WT arcA", "TetR","TetR arcA"),las=2,
        cex.axis=1.1,cex.lab=1.1,ylab="FIC2",ylim=c(0.5,1.3),main="Fig. 5C")
abline(h=1,col="darkgrey",lty=2)
```